diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
233 files changed, 20206 insertions, 10619 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index ffcb9a09ad73..632ea8e9cdc4 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -25,7 +25,6 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RegisterClassInfo.h"  #include "llvm/CodeGen/ScheduleDAG.h"  #include "llvm/CodeGen/TargetInstrInfo.h" @@ -35,6 +34,7 @@  #include "llvm/MC/MCRegisterInfo.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/raw_ostream.h"  #include <cassert>  #include <map> @@ -139,10 +139,11 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(        CriticalPathSet |= CPSet;     } -  DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); -  DEBUG(for (unsigned r : CriticalPathSet.set_bits()) -          dbgs() << " " << printReg(r, TRI)); -  DEBUG(dbgs() << '\n'); +   LLVM_DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); +   LLVM_DEBUG(for (unsigned r +                   : CriticalPathSet.set_bits()) dbgs() +              << " " << printReg(r, TRI)); +   LLVM_DEBUG(dbgs() << '\n');  }  AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { @@ -202,9 +203,9 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,    PrescanInstruction(MI, Count, PassthruRegs);    ScanInstruction(MI, Count); -  DEBUG(dbgs() << "Observe: "); -  DEBUG(MI.dump()); -  DEBUG(dbgs() << "\tRegs:"); +  LLVM_DEBUG(dbgs() << "Observe: "); +  LLVM_DEBUG(MI.dump()); +  LLVM_DEBUG(dbgs() << "\tRegs:");    std::vector<unsigned> &DefIndices = State->GetDefIndices();    for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { @@ -215,16 +216,16 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,      // conservative location (i.e. the beginning of the previous      // schedule region).      if (State->IsLive(Reg)) { -      DEBUG(if (State->GetGroup(Reg) != 0) -              dbgs() << " " << printReg(Reg, TRI) << "=g" << -                State->GetGroup(Reg) << "->g0(region live-out)"); +      LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() +                 << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg) +                 << "->g0(region live-out)");        State->UnionGroups(Reg, 0);      } else if ((DefIndices[Reg] < InsertPosIndex)                 && (DefIndices[Reg] >= Count)) {        DefIndices[Reg] = Count;      }    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');  }  bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, @@ -313,7 +314,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,    // subregister definitions).    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)      if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) { -      DEBUG(if (!header && footer) dbgs() << footer); +      LLVM_DEBUG(if (!header && footer) dbgs() << footer);        return;      } @@ -322,9 +323,11 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,      DefIndices[Reg] = ~0u;      RegRefs.erase(Reg);      State->LeaveGroup(Reg); -    DEBUG(if (header) { -        dbgs() << header << printReg(Reg, TRI); header = nullptr; }); -    DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); +    LLVM_DEBUG(if (header) { +      dbgs() << header << printReg(Reg, TRI); +      header = nullptr; +    }); +    LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);      // Repeat for subregisters. Note that we only do this if the superregister      // was not live because otherwise, regardless whether we have an explicit      // use of the subregister, the subregister's contents are needed for the @@ -336,15 +339,17 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,          DefIndices[SubregReg] = ~0u;          RegRefs.erase(SubregReg);          State->LeaveGroup(SubregReg); -        DEBUG(if (header) { -            dbgs() << header << printReg(Reg, TRI); header = nullptr; }); -        DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" << -              State->GetGroup(SubregReg) << tag); +        LLVM_DEBUG(if (header) { +          dbgs() << header << printReg(Reg, TRI); +          header = nullptr; +        }); +        LLVM_DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" +                          << State->GetGroup(SubregReg) << tag);        }      }    } -  DEBUG(if (!header && footer) dbgs() << footer); +  LLVM_DEBUG(if (!header && footer) dbgs() << footer);  }  void AggressiveAntiDepBreaker::PrescanInstruction( @@ -367,14 +372,15 @@ void AggressiveAntiDepBreaker::PrescanInstruction(      HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");    } -  DEBUG(dbgs() << "\tDef Groups:"); +  LLVM_DEBUG(dbgs() << "\tDef Groups:");    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {      MachineOperand &MO = MI.getOperand(i);      if (!MO.isReg() || !MO.isDef()) continue;      unsigned Reg = MO.getReg();      if (Reg == 0) continue; -    DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); +    LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" +                      << State->GetGroup(Reg));      // If MI's defs have a special allocation requirement, don't allow      // any def registers to be changed. Also assume all registers @@ -383,7 +389,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(      // can tell user specified registers from compiler-specified.      if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) ||          MI.isInlineAsm()) { -      DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); +      LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");        State->UnionGroups(Reg, 0);      } @@ -393,8 +399,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(        unsigned AliasReg = *AI;        if (State->IsLive(AliasReg)) {          State->UnionGroups(Reg, AliasReg); -        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " -                     << printReg(AliasReg, TRI) << ")"); +        LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " +                          << printReg(AliasReg, TRI) << ")");        }      } @@ -406,7 +412,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(      RegRefs.insert(std::make_pair(Reg, RR));    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');    // Scan the register defs for this instruction and update    // live-ranges. @@ -437,7 +443,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(  void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,                                                 unsigned Count) { -  DEBUG(dbgs() << "\tUse Groups:"); +  LLVM_DEBUG(dbgs() << "\tUse Groups:");    std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&      RegRefs = State->GetRegRefs(); @@ -448,11 +454,11 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,    // FIXME: The issue with predicated instruction is more complex. We are being    // conservatively here because the kill markers cannot be trusted after    // if-conversion: -  // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] +  // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]    // ... -  // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395] -  // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12] -  // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) +  // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] +  // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] +  // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)    //    // The first R6 kill is not really a kill since it's killed by a predicated    // instruction which may not be executed. The second R6 def may or may not @@ -469,7 +475,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,      unsigned Reg = MO.getReg();      if (Reg == 0) continue; -    DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); +    LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" +                      << State->GetGroup(Reg));      // It wasn't previously live but now it is, this is a kill. Forget      // the previous live-range information and start a new live-range @@ -477,7 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,      HandleLastUse(Reg, Count, "(last-use)");      if (Special) { -      DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); +      LLVM_DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");        State->UnionGroups(Reg, 0);      } @@ -489,12 +496,12 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,      RegRefs.insert(std::make_pair(Reg, RR));    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');    // Form a group of all defs and uses of a KILL instruction to ensure    // that all registers are renamed as a group.    if (MI.isKill()) { -    DEBUG(dbgs() << "\tKill Group:"); +    LLVM_DEBUG(dbgs() << "\tKill Group:");      unsigned FirstReg = 0;      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -504,15 +511,15 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,        if (Reg == 0) continue;        if (FirstReg != 0) { -        DEBUG(dbgs() << "=" << printReg(Reg, TRI)); +        LLVM_DEBUG(dbgs() << "=" << printReg(Reg, TRI));          State->UnionGroups(FirstReg, Reg);        } else { -        DEBUG(dbgs() << " " << printReg(Reg, TRI)); +        LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));          FirstReg = Reg;        }      } -    DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); +    LLVM_DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');    }  } @@ -535,7 +542,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {        BV &= RCBV;      } -    DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); +    LLVM_DEBUG(dbgs() << " " << TRI->getRegClassName(RC));    }    return BV; @@ -562,8 +569,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(    // Find the "superest" register in the group. At the same time,    // collect the BitVector of registers that can be used to rename    // each register. -  DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex -        << ":\n"); +  LLVM_DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex +                    << ":\n");    std::map<unsigned, BitVector> RenameRegisterMap;    unsigned SuperReg = 0;    for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -573,13 +580,13 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(      // If Reg has any references, then collect possible rename regs      if (RegRefs.count(Reg) > 0) { -      DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":"); +      LLVM_DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":");        BitVector &BV = RenameRegisterMap[Reg];        assert(BV.empty());        BV = GetRenameRegisters(Reg); -      DEBUG({ +      LLVM_DEBUG({          dbgs() << " ::";          for (unsigned r : BV.set_bits())            dbgs() << " " << printReg(r, TRI); @@ -625,11 +632,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(    ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);    if (Order.empty()) { -    DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); +    LLVM_DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");      return false;    } -  DEBUG(dbgs() << "\tFind Registers:"); +  LLVM_DEBUG(dbgs() << "\tFind Registers:");    RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); @@ -645,7 +652,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(      // Don't replace a register with itself.      if (NewSuperReg == SuperReg) continue; -    DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':'); +    LLVM_DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':');      RenameMap.clear();      // For each referenced group register (which must be a SuperReg or @@ -662,11 +669,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(            NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);        } -      DEBUG(dbgs() << " " << printReg(NewReg, TRI)); +      LLVM_DEBUG(dbgs() << " " << printReg(NewReg, TRI));        // Check if Reg can be renamed to NewReg.        if (!RenameRegisterMap[Reg].test(NewReg)) { -        DEBUG(dbgs() << "(no rename)"); +        LLVM_DEBUG(dbgs() << "(no rename)");          goto next_super_reg;        } @@ -675,7 +682,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(        // must also check all aliases of NewReg, because we can't define a        // register when any sub or super is already live.        if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { -        DEBUG(dbgs() << "(live)"); +        LLVM_DEBUG(dbgs() << "(live)");          goto next_super_reg;        } else {          bool found = false; @@ -683,7 +690,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(            unsigned AliasReg = *AI;            if (State->IsLive(AliasReg) ||                (KillIndices[Reg] > DefIndices[AliasReg])) { -            DEBUG(dbgs() << "(alias " << printReg(AliasReg, TRI) << " live)"); +            LLVM_DEBUG(dbgs() +                       << "(alias " << printReg(AliasReg, TRI) << " live)");              found = true;              break;            } @@ -701,7 +709,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(            continue;          if (UseMI->getOperand(Idx).isEarlyClobber()) { -          DEBUG(dbgs() << "(ec)"); +          LLVM_DEBUG(dbgs() << "(ec)");            goto next_super_reg;          }        } @@ -715,7 +723,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(          MachineInstr *DefMI = Q.second.Operand->getParent();          if (DefMI->readsRegister(NewReg, TRI)) { -          DEBUG(dbgs() << "(ec)"); +          LLVM_DEBUG(dbgs() << "(ec)");            goto next_super_reg;          }        } @@ -728,14 +736,14 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(      // renamed, as recorded in RenameMap.      RenameOrder.erase(SuperRC);      RenameOrder.insert(RenameOrderType::value_type(SuperRC, R)); -    DEBUG(dbgs() << "]\n"); +    LLVM_DEBUG(dbgs() << "]\n");      return true;    next_super_reg: -    DEBUG(dbgs() << ']'); +    LLVM_DEBUG(dbgs() << ']');    } while (R != EndR); -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');    // No registers are free and available!    return false; @@ -788,13 +796,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(    }  #ifndef NDEBUG -  DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); -  DEBUG(dbgs() << "Available regs:"); +  LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); +  LLVM_DEBUG(dbgs() << "Available regs:");    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {      if (!State->IsLive(Reg)) -      DEBUG(dbgs() << " " << printReg(Reg, TRI)); +      LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');  #endif    BitVector RegAliases(TRI->getNumRegs()); @@ -808,11 +816,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(         I != E; --Count) {      MachineInstr &MI = *--I; -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue; -    DEBUG(dbgs() << "Anti: "); -    DEBUG(MI.dump()); +    LLVM_DEBUG(dbgs() << "Anti: "); +    LLVM_DEBUG(MI.dump());      std::set<unsigned> PassthruRegs;      GetPassthruRegs(MI, PassthruRegs); @@ -848,30 +856,30 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(              (Edge->getKind() != SDep::Output)) continue;          unsigned AntiDepReg = Edge->getReg(); -        DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI)); +        LLVM_DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI));          assert(AntiDepReg != 0 && "Anti-dependence on reg0?");          if (!MRI.isAllocatable(AntiDepReg)) {            // Don't break anti-dependencies on non-allocatable registers. -          DEBUG(dbgs() << " (non-allocatable)\n"); +          LLVM_DEBUG(dbgs() << " (non-allocatable)\n");            continue;          } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) {            // Don't break anti-dependencies for critical path registers            // if not on the critical path -          DEBUG(dbgs() << " (not critical-path)\n"); +          LLVM_DEBUG(dbgs() << " (not critical-path)\n");            continue;          } else if (PassthruRegs.count(AntiDepReg) != 0) {            // If the anti-dep register liveness "passes-thru", then            // don't try to change it. It will be changed along with            // the use if required to break an earlier antidep. -          DEBUG(dbgs() << " (passthru)\n"); +          LLVM_DEBUG(dbgs() << " (passthru)\n");            continue;          } else {            // No anti-dep breaking for implicit deps            MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg);            assert(AntiDepOp && "Can't find index for defined register operand");            if (!AntiDepOp || AntiDepOp->isImplicit()) { -            DEBUG(dbgs() << " (implicit)\n"); +            LLVM_DEBUG(dbgs() << " (implicit)\n");              continue;            } @@ -897,13 +905,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(                   PE = PathSU->Preds.end(); P != PE; ++P) {              if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&                  (P->getKind() != SDep::Output)) { -              DEBUG(dbgs() << " (real dependency)\n"); +              LLVM_DEBUG(dbgs() << " (real dependency)\n");                AntiDepReg = 0;                break;              } else if ((P->getSUnit() != NextSU) &&                         (P->getKind() == SDep::Data) &&                         (P->getReg() == AntiDepReg)) { -              DEBUG(dbgs() << " (other dependency)\n"); +              LLVM_DEBUG(dbgs() << " (other dependency)\n");                AntiDepReg = 0;                break;              } @@ -941,17 +949,17 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(          // Determine AntiDepReg's register group.          const unsigned GroupIndex = State->GetGroup(AntiDepReg);          if (GroupIndex == 0) { -          DEBUG(dbgs() << " (zero group)\n"); +          LLVM_DEBUG(dbgs() << " (zero group)\n");            continue;          } -        DEBUG(dbgs() << '\n'); +        LLVM_DEBUG(dbgs() << '\n');          // Look for a suitable register to use to break the anti-dependence.          std::map<unsigned, unsigned> RenameMap;          if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { -          DEBUG(dbgs() << "\tBreaking anti-dependence edge on " -                       << printReg(AntiDepReg, TRI) << ":"); +          LLVM_DEBUG(dbgs() << "\tBreaking anti-dependence edge on " +                            << printReg(AntiDepReg, TRI) << ":");            // Handle each group register...            for (std::map<unsigned, unsigned>::iterator @@ -959,9 +967,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(              unsigned CurrReg = S->first;              unsigned NewReg = S->second; -            DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" -                         << printReg(NewReg, TRI) << "(" -                         << RegRefs.count(CurrReg) << " refs)"); +            LLVM_DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" +                              << printReg(NewReg, TRI) << "(" +                              << RegRefs.count(CurrReg) << " refs)");              // Update the references to the old register CurrReg to              // refer to the new register NewReg. @@ -994,7 +1002,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(            }            ++Broken; -          DEBUG(dbgs() << '\n'); +          LLVM_DEBUG(dbgs() << '\n');          }        }      } diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp index 8e8c1d8e08d1..37dcb0be824e 100644 --- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -39,7 +39,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,      HardHints = true;    rewind(); -  DEBUG({ +  LLVM_DEBUG({      if (!Hints.empty()) {        dbgs() << "hints:";        for (unsigned I = 0, E = Hints.size(); I != E; ++I) diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 0731ae575437..79f11def38f7 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -629,26 +629,26 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,    return true;  } -static void collectFuncletMembers( -    DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet, +static void collectEHScopeMembers( +    DenseMap<const MachineBasicBlock *, int> &EHScopeMembership, int EHScope,      const MachineBasicBlock *MBB) {    SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB};    while (!Worklist.empty()) {      const MachineBasicBlock *Visiting = Worklist.pop_back_val(); -    // Don't follow blocks which start new funclets. +    // Don't follow blocks which start new scopes.      if (Visiting->isEHPad() && Visiting != MBB)        continue; -    // Add this MBB to our funclet. -    auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet)); +    // Add this MBB to our scope. +    auto P = EHScopeMembership.insert(std::make_pair(Visiting, EHScope));      // Don't revisit blocks.      if (!P.second) { -      assert(P.first->second == Funclet && "MBB is part of two funclets!"); +      assert(P.first->second == EHScope && "MBB is part of two scopes!");        continue;      } -    // Returns are boundaries where funclet transfer can occur, don't follow +    // Returns are boundaries where scope transfer can occur, don't follow      // successors.      if (Visiting->isReturnBlock())        continue; @@ -659,25 +659,25 @@ static void collectFuncletMembers(  }  DenseMap<const MachineBasicBlock *, int> -llvm::getFuncletMembership(const MachineFunction &MF) { -  DenseMap<const MachineBasicBlock *, int> FuncletMembership; +llvm::getEHScopeMembership(const MachineFunction &MF) { +  DenseMap<const MachineBasicBlock *, int> EHScopeMembership;    // We don't have anything to do if there aren't any EH pads. -  if (!MF.hasEHFunclets()) -    return FuncletMembership; +  if (!MF.hasEHScopes()) +    return EHScopeMembership;    int EntryBBNumber = MF.front().getNumber();    bool IsSEH = isAsynchronousEHPersonality(        classifyEHPersonality(MF.getFunction().getPersonalityFn()));    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); -  SmallVector<const MachineBasicBlock *, 16> FuncletBlocks; +  SmallVector<const MachineBasicBlock *, 16> EHScopeBlocks;    SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;    SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;    SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;    for (const MachineBasicBlock &MBB : MF) { -    if (MBB.isEHFuncletEntry()) { -      FuncletBlocks.push_back(&MBB); +    if (MBB.isEHScopeEntry()) { +      EHScopeBlocks.push_back(&MBB);      } else if (IsSEH && MBB.isEHPad()) {        SEHCatchPads.push_back(&MBB);      } else if (MBB.pred_empty()) { @@ -686,8 +686,8 @@ llvm::getFuncletMembership(const MachineFunction &MF) {      MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); -    // CatchPads are not funclets for SEH so do not consider CatchRet to -    // transfer control to another funclet. +    // CatchPads are not scopes for SEH so do not consider CatchRet to +    // transfer control to another scope.      if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())        continue; @@ -700,24 +700,24 @@ llvm::getFuncletMembership(const MachineFunction &MF) {    }    // We don't have anything to do if there aren't any EH pads. -  if (FuncletBlocks.empty()) -    return FuncletMembership; +  if (EHScopeBlocks.empty()) +    return EHScopeMembership;    // Identify all the basic blocks reachable from the function entry. -  collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front()); -  // All blocks not part of a funclet are in the parent function. +  collectEHScopeMembers(EHScopeMembership, EntryBBNumber, &MF.front()); +  // All blocks not part of a scope are in the parent function.    for (const MachineBasicBlock *MBB : UnreachableBlocks) -    collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); -  // Next, identify all the blocks inside the funclets. -  for (const MachineBasicBlock *MBB : FuncletBlocks) -    collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB); -  // SEH CatchPads aren't really funclets, handle them separately. +    collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB); +  // Next, identify all the blocks inside the scopes. +  for (const MachineBasicBlock *MBB : EHScopeBlocks) +    collectEHScopeMembers(EHScopeMembership, MBB->getNumber(), MBB); +  // SEH CatchPads aren't really scopes, handle them separately.    for (const MachineBasicBlock *MBB : SEHCatchPads) -    collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); +    collectEHScopeMembers(EHScopeMembership, EntryBBNumber, MBB);    // Finally, identify all the targets of a catchret.    for (std::pair<const MachineBasicBlock *, int> CatchRetPair :         CatchRetSuccessors) -    collectFuncletMembers(FuncletMembership, CatchRetPair.second, +    collectEHScopeMembers(EHScopeMembership, CatchRetPair.second,                            CatchRetPair.first); -  return FuncletMembership; +  return EHScopeMembership;  } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 15cfbd5c40ff..9011f025f595 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -91,7 +91,8 @@ void ARMException::endFunction(const MachineFunction *MF) {      ATS.emitFnEnd();  } -void ARMException::emitTypeInfos(unsigned TTypeEncoding) { +void ARMException::emitTypeInfos(unsigned TTypeEncoding, +                                 MCSymbol *TTBaseLabel) {    const MachineFunction *MF = Asm->MF;    const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();    const std::vector<unsigned> &FilterIds = MF->getFilterIds(); @@ -112,6 +113,8 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {      Asm->EmitTTypeReference(GV, TTypeEncoding);    } +  Asm->OutStreamer->EmitLabel(TTBaseLabel); +    // Emit the Exception Specifications.    if (VerboseAsm && !FilterIds.empty()) {      Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp new file mode 100644 index 000000000000..20b0b8d3feab --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -0,0 +1,721 @@ +//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AccelTable.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <vector> + +using namespace llvm; + +void AccelTableBase::computeBucketCount() { +  // First get the number of unique hashes. +  std::vector<uint32_t> Uniques; +  Uniques.reserve(Entries.size()); +  for (const auto &E : Entries) +    Uniques.push_back(E.second.HashValue); +  array_pod_sort(Uniques.begin(), Uniques.end()); +  std::vector<uint32_t>::iterator P = +      std::unique(Uniques.begin(), Uniques.end()); + +  UniqueHashCount = std::distance(Uniques.begin(), P); + +  if (UniqueHashCount > 1024) +    BucketCount = UniqueHashCount / 4; +  else if (UniqueHashCount > 16) +    BucketCount = UniqueHashCount / 2; +  else +    BucketCount = std::max<uint32_t>(UniqueHashCount, 1); +} + +void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { +  // Create the individual hash data outputs. +  for (auto &E : Entries) { +    // Unique the entries. +    std::stable_sort(E.second.Values.begin(), E.second.Values.end(), +                     [](const AccelTableData *A, const AccelTableData *B) { +                       return *A < *B; +                     }); +    E.second.Values.erase( +        std::unique(E.second.Values.begin(), E.second.Values.end()), +        E.second.Values.end()); +  } + +  // Figure out how many buckets we need, then compute the bucket contents and +  // the final ordering. The hashes and offsets can be emitted by walking these +  // data structures. We add temporary symbols to the data so they can be +  // referenced when emitting the offsets. +  computeBucketCount(); + +  // Compute bucket contents and final ordering. +  Buckets.resize(BucketCount); +  for (auto &E : Entries) { +    uint32_t Bucket = E.second.HashValue % BucketCount; +    Buckets[Bucket].push_back(&E.second); +    E.second.Sym = Asm->createTempSymbol(Prefix); +  } + +  // Sort the contents of the buckets by hash value so that hash collisions end +  // up together. Stable sort makes testing easier and doesn't cost much more. +  for (auto &Bucket : Buckets) +    std::stable_sort(Bucket.begin(), Bucket.end(), +                     [](HashData *LHS, HashData *RHS) { +                       return LHS->HashValue < RHS->HashValue; +                     }); +} + +namespace { +/// Base class for writing out Accelerator tables. It holds the common +/// functionality for the two Accelerator table types. +class AccelTableWriter { +protected: +  AsmPrinter *const Asm;          ///< Destination. +  const AccelTableBase &Contents; ///< Data to emit. + +  /// Controls whether to emit duplicate hash and offset table entries for names +  /// with identical hashes. Apple tables don't emit duplicate entries, DWARF v5 +  /// tables do. +  const bool SkipIdenticalHashes; + +  void emitHashes() const; + +  /// Emit offsets to lists of entries with identical names. The offsets are +  /// relative to the Base argument. +  void emitOffsets(const MCSymbol *Base) const; + +public: +  AccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents, +                   bool SkipIdenticalHashes) +      : Asm(Asm), Contents(Contents), SkipIdenticalHashes(SkipIdenticalHashes) { +  } +}; + +class AppleAccelTableWriter : public AccelTableWriter { +  using Atom = AppleAccelTableData::Atom; + +  /// The fixed header of an Apple Accelerator Table. +  struct Header { +    uint32_t Magic = MagicHash; +    uint16_t Version = 1; +    uint16_t HashFunction = dwarf::DW_hash_function_djb; +    uint32_t BucketCount; +    uint32_t HashCount; +    uint32_t HeaderDataLength; + +    /// 'HASH' magic value to detect endianness. +    static const uint32_t MagicHash = 0x48415348; + +    Header(uint32_t BucketCount, uint32_t UniqueHashCount, uint32_t DataLength) +        : BucketCount(BucketCount), HashCount(UniqueHashCount), +          HeaderDataLength(DataLength) {} + +    void emit(AsmPrinter *Asm) const; +#ifndef NDEBUG +    void print(raw_ostream &OS) const; +    void dump() const { print(dbgs()); } +#endif +  }; + +  /// The HeaderData describes the structure of an Apple accelerator table +  /// through a list of Atoms. +  struct HeaderData { +    /// In the case of data that is referenced via DW_FORM_ref_* the offset +    /// base is used to describe the offset for all forms in the list of atoms. +    uint32_t DieOffsetBase; + +    const SmallVector<Atom, 4> Atoms; + +    HeaderData(ArrayRef<Atom> AtomList, uint32_t Offset = 0) +        : DieOffsetBase(Offset), Atoms(AtomList.begin(), AtomList.end()) {} + +    void emit(AsmPrinter *Asm) const; +#ifndef NDEBUG +    void print(raw_ostream &OS) const; +    void dump() const { print(dbgs()); } +#endif +  }; + +  Header Header; +  HeaderData HeaderData; +  const MCSymbol *SecBegin; + +  void emitBuckets() const; +  void emitData() const; + +public: +  AppleAccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents, +                        ArrayRef<Atom> Atoms, const MCSymbol *SecBegin) +      : AccelTableWriter(Asm, Contents, true), +        Header(Contents.getBucketCount(), Contents.getUniqueHashCount(), +               8 + (Atoms.size() * 4)), +        HeaderData(Atoms), SecBegin(SecBegin) {} + +  void emit() const; + +#ifndef NDEBUG +  void print(raw_ostream &OS) const; +  void dump() const { print(dbgs()); } +#endif +}; + +/// Class responsible for emitting a DWARF v5 Accelerator Table. The only +/// public function is emit(), which performs the actual emission. +/// +/// The class is templated in its data type. This allows us to emit both dyamic +/// and static data entries. A callback abstract the logic to provide a CU +/// index for a given entry, which is different per data type, but identical +/// for every entry in the same table. +template <typename DataT> +class Dwarf5AccelTableWriter : public AccelTableWriter { +  struct Header { +    uint32_t UnitLength = 0; +    uint16_t Version = 5; +    uint16_t Padding = 0; +    uint32_t CompUnitCount; +    uint32_t LocalTypeUnitCount = 0; +    uint32_t ForeignTypeUnitCount = 0; +    uint32_t BucketCount; +    uint32_t NameCount; +    uint32_t AbbrevTableSize = 0; +    uint32_t AugmentationStringSize = sizeof(AugmentationString); +    char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'}; + +    Header(uint32_t CompUnitCount, uint32_t BucketCount, uint32_t NameCount) +        : CompUnitCount(CompUnitCount), BucketCount(BucketCount), +          NameCount(NameCount) {} + +    void emit(const Dwarf5AccelTableWriter &Ctx) const; +  }; +  struct AttributeEncoding { +    dwarf::Index Index; +    dwarf::Form Form; +  }; + +  Header Header; +  DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations; +  ArrayRef<MCSymbol *> CompUnits; +  llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry; +  MCSymbol *ContributionStart = Asm->createTempSymbol("names_start"); +  MCSymbol *ContributionEnd = Asm->createTempSymbol("names_end"); +  MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start"); +  MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end"); +  MCSymbol *EntryPool = Asm->createTempSymbol("names_entries"); + +  DenseSet<uint32_t> getUniqueTags() const; + +  // Right now, we emit uniform attributes for all tags. +  SmallVector<AttributeEncoding, 2> getUniformAttributes() const; + +  void emitCUList() const; +  void emitBuckets() const; +  void emitStringOffsets() const; +  void emitAbbrevs() const; +  void emitEntry(const DataT &Entry) const; +  void emitData() const; + +public: +  Dwarf5AccelTableWriter( +      AsmPrinter *Asm, const AccelTableBase &Contents, +      ArrayRef<MCSymbol *> CompUnits, +      llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry); + +  void emit() const; +}; +} // namespace + +void AccelTableWriter::emitHashes() const { +  uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); +  unsigned BucketIdx = 0; +  for (auto &Bucket : Contents.getBuckets()) { +    for (auto &Hash : Bucket) { +      uint32_t HashValue = Hash->HashValue; +      if (SkipIdenticalHashes && PrevHash == HashValue) +        continue; +      Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(BucketIdx)); +      Asm->emitInt32(HashValue); +      PrevHash = HashValue; +    } +    BucketIdx++; +  } +} + +void AccelTableWriter::emitOffsets(const MCSymbol *Base) const { +  const auto &Buckets = Contents.getBuckets(); +  uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); +  for (size_t i = 0, e = Buckets.size(); i < e; ++i) { +    for (auto *Hash : Buckets[i]) { +      uint32_t HashValue = Hash->HashValue; +      if (SkipIdenticalHashes && PrevHash == HashValue) +        continue; +      PrevHash = HashValue; +      Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); +      Asm->EmitLabelDifference(Hash->Sym, Base, sizeof(uint32_t)); +    } +  } +} + +void AppleAccelTableWriter::Header::emit(AsmPrinter *Asm) const { +  Asm->OutStreamer->AddComment("Header Magic"); +  Asm->emitInt32(Magic); +  Asm->OutStreamer->AddComment("Header Version"); +  Asm->emitInt16(Version); +  Asm->OutStreamer->AddComment("Header Hash Function"); +  Asm->emitInt16(HashFunction); +  Asm->OutStreamer->AddComment("Header Bucket Count"); +  Asm->emitInt32(BucketCount); +  Asm->OutStreamer->AddComment("Header Hash Count"); +  Asm->emitInt32(HashCount); +  Asm->OutStreamer->AddComment("Header Data Length"); +  Asm->emitInt32(HeaderDataLength); +} + +void AppleAccelTableWriter::HeaderData::emit(AsmPrinter *Asm) const { +  Asm->OutStreamer->AddComment("HeaderData Die Offset Base"); +  Asm->emitInt32(DieOffsetBase); +  Asm->OutStreamer->AddComment("HeaderData Atom Count"); +  Asm->emitInt32(Atoms.size()); + +  for (const Atom &A : Atoms) { +    Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.Type)); +    Asm->emitInt16(A.Type); +    Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.Form)); +    Asm->emitInt16(A.Form); +  } +} + +void AppleAccelTableWriter::emitBuckets() const { +  const auto &Buckets = Contents.getBuckets(); +  unsigned index = 0; +  for (size_t i = 0, e = Buckets.size(); i < e; ++i) { +    Asm->OutStreamer->AddComment("Bucket " + Twine(i)); +    if (!Buckets[i].empty()) +      Asm->emitInt32(index); +    else +      Asm->emitInt32(std::numeric_limits<uint32_t>::max()); +    // Buckets point in the list of hashes, not to the data. Do not increment +    // the index multiple times in case of hash collisions. +    uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); +    for (auto *HD : Buckets[i]) { +      uint32_t HashValue = HD->HashValue; +      if (PrevHash != HashValue) +        ++index; +      PrevHash = HashValue; +    } +  } +} + +void AppleAccelTableWriter::emitData() const { +  const auto &Buckets = Contents.getBuckets(); +  for (size_t i = 0, e = Buckets.size(); i < e; ++i) { +    uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); +    for (auto &Hash : Buckets[i]) { +      // Terminate the previous entry if there is no hash collision with the +      // current one. +      if (PrevHash != std::numeric_limits<uint64_t>::max() && +          PrevHash != Hash->HashValue) +        Asm->emitInt32(0); +      // Remember to emit the label for our offset. +      Asm->OutStreamer->EmitLabel(Hash->Sym); +      Asm->OutStreamer->AddComment(Hash->Name.getString()); +      Asm->emitDwarfStringOffset(Hash->Name); +      Asm->OutStreamer->AddComment("Num DIEs"); +      Asm->emitInt32(Hash->Values.size()); +      for (const auto *V : Hash->Values) +        static_cast<const AppleAccelTableData *>(V)->emit(Asm); +      PrevHash = Hash->HashValue; +    } +    // Emit the final end marker for the bucket. +    if (!Buckets[i].empty()) +      Asm->emitInt32(0); +  } +} + +void AppleAccelTableWriter::emit() const { +  Header.emit(Asm); +  HeaderData.emit(Asm); +  emitBuckets(); +  emitHashes(); +  emitOffsets(SecBegin); +  emitData(); +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::Header::emit( +    const Dwarf5AccelTableWriter &Ctx) const { +  assert(CompUnitCount > 0 && "Index must have at least one CU."); + +  AsmPrinter *Asm = Ctx.Asm; +  Asm->OutStreamer->AddComment("Header: unit length"); +  Asm->EmitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart, +                           sizeof(uint32_t)); +  Asm->OutStreamer->EmitLabel(Ctx.ContributionStart); +  Asm->OutStreamer->AddComment("Header: version"); +  Asm->emitInt16(Version); +  Asm->OutStreamer->AddComment("Header: padding"); +  Asm->emitInt16(Padding); +  Asm->OutStreamer->AddComment("Header: compilation unit count"); +  Asm->emitInt32(CompUnitCount); +  Asm->OutStreamer->AddComment("Header: local type unit count"); +  Asm->emitInt32(LocalTypeUnitCount); +  Asm->OutStreamer->AddComment("Header: foreign type unit count"); +  Asm->emitInt32(ForeignTypeUnitCount); +  Asm->OutStreamer->AddComment("Header: bucket count"); +  Asm->emitInt32(BucketCount); +  Asm->OutStreamer->AddComment("Header: name count"); +  Asm->emitInt32(NameCount); +  Asm->OutStreamer->AddComment("Header: abbreviation table size"); +  Asm->EmitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t)); +  Asm->OutStreamer->AddComment("Header: augmentation string size"); +  assert(AugmentationStringSize % 4 == 0); +  Asm->emitInt32(AugmentationStringSize); +  Asm->OutStreamer->AddComment("Header: augmentation string"); +  Asm->OutStreamer->EmitBytes({AugmentationString, AugmentationStringSize}); +} + +template <typename DataT> +DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const { +  DenseSet<uint32_t> UniqueTags; +  for (auto &Bucket : Contents.getBuckets()) { +    for (auto *Hash : Bucket) { +      for (auto *Value : Hash->Values) { +        unsigned Tag = static_cast<const DataT *>(Value)->getDieTag(); +        UniqueTags.insert(Tag); +      } +    } +  } +  return UniqueTags; +} + +template <typename DataT> +SmallVector<typename Dwarf5AccelTableWriter<DataT>::AttributeEncoding, 2> +Dwarf5AccelTableWriter<DataT>::getUniformAttributes() const { +  SmallVector<AttributeEncoding, 2> UA; +  if (CompUnits.size() > 1) { +    size_t LargestCUIndex = CompUnits.size() - 1; +    dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false, LargestCUIndex); +    UA.push_back({dwarf::DW_IDX_compile_unit, Form}); +  } +  UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); +  return UA; +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitCUList() const { +  for (const auto &CU : enumerate(CompUnits)) { +    Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index())); +    Asm->emitDwarfSymbolReference(CU.value()); +  } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitBuckets() const { +  uint32_t Index = 1; +  for (const auto &Bucket : enumerate(Contents.getBuckets())) { +    Asm->OutStreamer->AddComment("Bucket " + Twine(Bucket.index())); +    Asm->emitInt32(Bucket.value().empty() ? 0 : Index); +    Index += Bucket.value().size(); +  } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const { +  for (const auto &Bucket : enumerate(Contents.getBuckets())) { +    for (auto *Hash : Bucket.value()) { +      DwarfStringPoolEntryRef String = Hash->Name; +      Asm->OutStreamer->AddComment("String in Bucket " + Twine(Bucket.index()) + +                                   ": " + String.getString()); +      Asm->emitDwarfStringOffset(String); +    } +  } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const { +  Asm->OutStreamer->EmitLabel(AbbrevStart); +  for (const auto &Abbrev : Abbreviations) { +    Asm->OutStreamer->AddComment("Abbrev code"); +    assert(Abbrev.first != 0); +    Asm->EmitULEB128(Abbrev.first); +    Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first)); +    Asm->EmitULEB128(Abbrev.first); +    for (const auto &AttrEnc : Abbrev.second) { +      Asm->EmitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data()); +      Asm->EmitULEB128(AttrEnc.Form, +                       dwarf::FormEncodingString(AttrEnc.Form).data()); +    } +    Asm->EmitULEB128(0, "End of abbrev"); +    Asm->EmitULEB128(0, "End of abbrev"); +  } +  Asm->EmitULEB128(0, "End of abbrev list"); +  Asm->OutStreamer->EmitLabel(AbbrevEnd); +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const { +  auto AbbrevIt = Abbreviations.find(Entry.getDieTag()); +  assert(AbbrevIt != Abbreviations.end() && +         "Why wasn't this abbrev generated?"); + +  Asm->EmitULEB128(AbbrevIt->first, "Abbreviation code"); +  for (const auto &AttrEnc : AbbrevIt->second) { +    Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index)); +    switch (AttrEnc.Index) { +    case dwarf::DW_IDX_compile_unit: { +      DIEInteger ID(getCUIndexForEntry(Entry)); +      ID.EmitValue(Asm, AttrEnc.Form); +      break; +    } +    case dwarf::DW_IDX_die_offset: +      assert(AttrEnc.Form == dwarf::DW_FORM_ref4); +      Asm->emitInt32(Entry.getDieOffset()); +      break; +    default: +      llvm_unreachable("Unexpected index attribute!"); +    } +  } +} + +template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const { +  Asm->OutStreamer->EmitLabel(EntryPool); +  for (auto &Bucket : Contents.getBuckets()) { +    for (auto *Hash : Bucket) { +      // Remember to emit the label for our offset. +      Asm->OutStreamer->EmitLabel(Hash->Sym); +      for (const auto *Value : Hash->Values) +        emitEntry(*static_cast<const DataT *>(Value)); +      Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString()); +      Asm->emitInt32(0); +    } +  } +} + +template <typename DataT> +Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter( +    AsmPrinter *Asm, const AccelTableBase &Contents, +    ArrayRef<MCSymbol *> CompUnits, +    llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry) +    : AccelTableWriter(Asm, Contents, false), +      Header(CompUnits.size(), Contents.getBucketCount(), +             Contents.getUniqueNameCount()), +      CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) { +  DenseSet<uint32_t> UniqueTags = getUniqueTags(); +  SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes(); + +  Abbreviations.reserve(UniqueTags.size()); +  for (uint32_t Tag : UniqueTags) +    Abbreviations.try_emplace(Tag, UniformAttributes); +} + +template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const { +  Header.emit(*this); +  emitCUList(); +  emitBuckets(); +  emitHashes(); +  emitStringOffsets(); +  emitOffsets(EntryPool); +  emitAbbrevs(); +  emitData(); +  Asm->OutStreamer->EmitValueToAlignment(4, 0); +  Asm->OutStreamer->EmitLabel(ContributionEnd); +} + +void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, +                                   StringRef Prefix, const MCSymbol *SecBegin, +                                   ArrayRef<AppleAccelTableData::Atom> Atoms) { +  Contents.finalize(Asm, Prefix); +  AppleAccelTableWriter(Asm, Contents, Atoms, SecBegin).emit(); +} + +void llvm::emitDWARF5AccelTable( +    AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents, +    const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) { +  std::vector<MCSymbol *> CompUnits; +  for (const auto &CU : enumerate(CUs)) { +    assert(CU.index() == CU.value()->getUniqueID()); +    const DwarfCompileUnit *MainCU = +        DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get(); +    CompUnits.push_back(MainCU->getLabelBegin()); +  } + +  Contents.finalize(Asm, "names"); +  Dwarf5AccelTableWriter<DWARF5AccelTableData>( +      Asm, Contents, CompUnits, +      [&DD](const DWARF5AccelTableData &Entry) { +        const DIE *CUDie = Entry.getDie().getUnitDie(); +        return DD.lookupCU(CUDie)->getUniqueID(); +      }) +      .emit(); +} + +void llvm::emitDWARF5AccelTable( +    AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents, +    ArrayRef<MCSymbol *> CUs, +    llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)> +        getCUIndexForEntry) { +  Contents.finalize(Asm, "names"); +  Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(Asm, Contents, CUs, +                                                     getCUIndexForEntry) +      .emit(); +} + +void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const { +  Asm->emitInt32(Die.getDebugSectionOffset()); +} + +void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const { +  Asm->emitInt32(Die.getDebugSectionOffset()); +  Asm->emitInt16(Die.getTag()); +  Asm->emitInt8(0); +} + +void AppleAccelTableStaticOffsetData::emit(AsmPrinter *Asm) const { +  Asm->emitInt32(Offset); +} + +void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const { +  Asm->emitInt32(Offset); +  Asm->emitInt16(Tag); +  Asm->emitInt8(ObjCClassIsImplementation ? dwarf::DW_FLAG_type_implementation +                                          : 0); +  Asm->emitInt32(QualifiedNameHash); +} + +#ifndef _MSC_VER +// The lines below are rejected by older versions (TBD) of MSVC. +constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[]; +#else +// FIXME: Erase this path once the minimum MSCV version has been bumped. +const SmallVector<AppleAccelTableData::Atom, 4> +    AppleAccelTableOffsetData::Atoms = { +        Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; +const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms = +    {Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), +     Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), +     Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; +const SmallVector<AppleAccelTableData::Atom, 4> +    AppleAccelTableStaticOffsetData::Atoms = { +        Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)}; +const SmallVector<AppleAccelTableData::Atom, 4> +    AppleAccelTableStaticTypeData::Atoms = { +        Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), +        Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), +        Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)}; +#endif + +#ifndef NDEBUG +void AppleAccelTableWriter::Header::print(raw_ostream &OS) const { +  OS << "Magic: " << format("0x%x", Magic) << "\n" +     << "Version: " << Version << "\n" +     << "Hash Function: " << HashFunction << "\n" +     << "Bucket Count: " << BucketCount << "\n" +     << "Header Data Length: " << HeaderDataLength << "\n"; +} + +void AppleAccelTableData::Atom::print(raw_ostream &OS) const { +  OS << "Type: " << dwarf::AtomTypeString(Type) << "\n" +     << "Form: " << dwarf::FormEncodingString(Form) << "\n"; +} + +void AppleAccelTableWriter::HeaderData::print(raw_ostream &OS) const { +  OS << "DIE Offset Base: " << DieOffsetBase << "\n"; +  for (auto Atom : Atoms) +    Atom.print(OS); +} + +void AppleAccelTableWriter::print(raw_ostream &OS) const { +  Header.print(OS); +  HeaderData.print(OS); +  Contents.print(OS); +  SecBegin->print(OS, nullptr); +} + +void AccelTableBase::HashData::print(raw_ostream &OS) const { +  OS << "Name: " << Name.getString() << "\n"; +  OS << "  Hash Value: " << format("0x%x", HashValue) << "\n"; +  OS << "  Symbol: "; +  if (Sym) +    OS << *Sym; +  else +    OS << "<none>"; +  OS << "\n"; +  for (auto *Value : Values) +    Value->print(OS); +} + +void AccelTableBase::print(raw_ostream &OS) const { +  // Print Content. +  OS << "Entries: \n"; +  for (const auto &Entry : Entries) { +    OS << "Name: " << Entry.first() << "\n"; +    for (auto *V : Entry.second.Values) +      V->print(OS); +  } + +  OS << "Buckets and Hashes: \n"; +  for (auto &Bucket : Buckets) +    for (auto &Hash : Bucket) +      Hash->print(OS); + +  OS << "Data: \n"; +  for (auto &E : Entries) +    E.second.print(OS); +} + +void DWARF5AccelTableData::print(raw_ostream &OS) const { +  OS << "  Offset: " << getDieOffset() << "\n"; +  OS << "  Tag: " << dwarf::TagString(getDieTag()) << "\n"; +} + +void DWARF5AccelTableStaticData::print(raw_ostream &OS) const { +  OS << "  Offset: " << getDieOffset() << "\n"; +  OS << "  Tag: " << dwarf::TagString(getDieTag()) << "\n"; +} + +void AppleAccelTableOffsetData::print(raw_ostream &OS) const { +  OS << "  Offset: " << Die.getOffset() << "\n"; +} + +void AppleAccelTableTypeData::print(raw_ostream &OS) const { +  OS << "  Offset: " << Die.getOffset() << "\n"; +  OS << "  Tag: " << dwarf::TagString(Die.getTag()) << "\n"; +} + +void AppleAccelTableStaticOffsetData::print(raw_ostream &OS) const { +  OS << "  Static Offset: " << Offset << "\n"; +} + +void AppleAccelTableStaticTypeData::print(raw_ostream &OS) const { +  OS << "  Static Offset: " << Offset << "\n"; +  OS << "  QualifiedNameHash: " << format("%x\n", QualifiedNameHash) << "\n"; +  OS << "  Tag: " << dwarf::TagString(Tag) << "\n"; +  OS << "  ObjCClassIsImplementation: " +     << (ObjCClassIsImplementation ? "true" : "false"); +  OS << "\n"; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 59ed0324bdb0..4a226527cb5b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -10,9 +10,9 @@  #include "AddressPool.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include <utility>  using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index 990a158d87cd..5350006bf744 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -39,7 +39,7 @@ class AddressPool {  public:    AddressPool() = default; -  /// \brief Returns the index into the address pool with the given +  /// Returns the index into the address pool with the given    /// label/symbol.    unsigned getIndex(const MCSymbol *Sym, bool TLS = false); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d7995447592c..9bbc77b3056b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -16,6 +16,7 @@  #include "CodeViewDebug.h"  #include "DwarfDebug.h"  #include "DwarfException.h" +#include "WinCFGuard.h"  #include "WinException.h"  #include "llvm/ADT/APFloat.h"  #include "llvm/ADT/APInt.h" @@ -30,7 +31,6 @@  #include "llvm/ADT/Twine.h"  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/ObjectUtils.h"  #include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/BinaryFormat/Dwarf.h"  #include "llvm/BinaryFormat/ELF.h" @@ -39,6 +39,7 @@  #include "llvm/CodeGen/GCStrategy.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,7 +55,6 @@  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -87,6 +87,7 @@  #include "llvm/MC/MCExpr.h"  #include "llvm/MC/MCInst.h"  #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionCOFF.h"  #include "llvm/MC/MCSectionELF.h"  #include "llvm/MC/MCSectionMachO.h"  #include "llvm/MC/MCStreamer.h" @@ -107,6 +108,7 @@  #include "llvm/Support/TargetRegistry.h"  #include "llvm/Support/Timer.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h"  #include <algorithm> @@ -130,6 +132,8 @@ static const char *const DbgTimerName = "emit";  static const char *const DbgTimerDescription = "Debug Info Emission";  static const char *const EHTimerName = "write_exception";  static const char *const EHTimerDescription = "DWARF Exception Writer"; +static const char *const CFGuardName = "Control Flow Guard"; +static const char *const CFGuardDescription = "Control Flow Guard Tables";  static const char *const CodeViewLineTablesGroupName = "linetables";  static const char *const CodeViewLineTablesGroupDescription =    "CodeView Line Tables"; @@ -211,8 +215,10 @@ const DataLayout &AsmPrinter::getDataLayout() const {  }  // Do not use the cached DataLayout because some client use it without a Module -// (llvm-dsymutil, llvm-dwarfdump). -unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); } +// (dsymutil, llvm-dwarfdump). +unsigned AsmPrinter::getPointerSize() const { +  return TM.getPointerSize(0); // FIXME: Default address space +}  const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {    assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); @@ -234,7 +240,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<MachineModuleInfo>();    AU.addRequired<MachineOptimizationRemarkEmitterPass>();    AU.addRequired<GCModuleInfo>(); -  AU.addRequired<MachineLoopInfo>();  }  bool AsmPrinter::doInitialization(Module &M) { @@ -246,7 +251,7 @@ bool AsmPrinter::doInitialization(Module &M) {    OutStreamer->InitSections(false); -  // Emit the version-min deplyment target directive if needed. +  // Emit the version-min deployment target directive if needed.    //    // FIXME: If we end up with a collection of these sorts of Darwin-specific    // or ELF-specific things, it may make sense to have a platform helper class @@ -291,8 +296,7 @@ bool AsmPrinter::doInitialization(Module &M) {    if (MAI->doesSupportDebugInformation()) {      bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); -    if (EmitCodeView && (TM.getTargetTriple().isKnownWindowsMSVCEnvironment() || -                         TM.getTargetTriple().isWindowsItaniumEnvironment())) { +    if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {        Handlers.push_back(HandlerInfo(new CodeViewDebug(this),                                       DbgTimerName, DbgTimerDescription,                                       CodeViewLineTablesGroupName, @@ -350,10 +354,20 @@ bool AsmPrinter::doInitialization(Module &M) {        break;      }      break; +  case ExceptionHandling::Wasm: +    // TODO to prevent warning +    break;    }    if (ES)      Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,                                     DWARFGroupName, DWARFGroupDescription)); + +  if (mdconst::extract_or_null<ConstantInt>( +          MMI->getModule()->getModuleFlag("cfguard"))) +    Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName, +                                   CFGuardDescription, DWARFGroupName, +                                   DWARFGroupDescription)); +    return false;  } @@ -361,7 +375,7 @@ static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {    if (!MAI.hasWeakDefCanBeHiddenDirective())      return false; -  return canBeOmittedFromSymbolTable(GV); +  return GV->canBeOmittedFromSymbolTable();  }  void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { @@ -416,7 +430,7 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {  /// EmitGlobalVariable - Emit the specified global variable to the .s file.  void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { -  bool IsEmuTLSVar = TM.Options.EmulatedTLS && GV->isThreadLocal(); +  bool IsEmuTLSVar = TM.useEmulatedTLS() && GV->isThreadLocal();    assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&           "No emulated TLS variables in the common section"); @@ -898,6 +912,30 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {    return true;  } +/// This method handles the target-independent form of DBG_LABEL, returning +/// true if it was able to do so.  A false return means the target will need +/// to handle MI in EmitInstruction. +static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) { +  if (MI->getNumOperands() != 1) +    return false; + +  SmallString<128> Str; +  raw_svector_ostream OS(Str); +  OS << "DEBUG_LABEL: "; + +  const DILabel *V = MI->getDebugLabel(); +  if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { +    StringRef Name = SP->getName(); +    if (!Name.empty()) +      OS << Name << ":"; +  } +  OS << V->getName(); + +  // NOTE: Want this comment at start of line, don't emit with AddComment. +  AP.OutStreamer->emitRawComment(OS.str()); +  return true; +} +  AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {    if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&        MF->getFunction().needsUnwindTableEntry()) @@ -952,7 +990,8 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {    if (!MF.getTarget().Options.EmitStackSizeSection)      return; -  MCSection *StackSizeSection = getObjFileLowering().getStackSizesSection(); +  MCSection *StackSizeSection = +      getObjFileLowering().getStackSizesSection(*getCurrentSection());    if (!StackSizeSection)      return; @@ -964,10 +1003,9 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {    OutStreamer->PushSection();    OutStreamer->SwitchSection(StackSizeSection); -  const MCSymbol *FunctionSymbol = getSymbol(&MF.getFunction()); +  const MCSymbol *FunctionSymbol = getFunctionBegin();    uint64_t StackSize = FrameInfo.getStackSize(); -  OutStreamer->EmitValue(MCSymbolRefExpr::create(FunctionSymbol, OutContext), -                         /* size = */ 8); +  OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());    OutStreamer->EmitULEB128IntValue(StackSize);    OutStreamer->PopSection(); @@ -996,6 +1034,24 @@ void AsmPrinter::EmitFunctionBody() {    bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); +  if (isVerbose()) { +    // Get MachineDominatorTree or compute it on the fly if it's unavailable +    MDT = getAnalysisIfAvailable<MachineDominatorTree>(); +    if (!MDT) { +      OwnedMDT = make_unique<MachineDominatorTree>(); +      OwnedMDT->getBase().recalculate(*MF); +      MDT = OwnedMDT.get(); +    } + +    // Get MachineLoopInfo or compute it on the fly if it's unavailable +    MLI = getAnalysisIfAvailable<MachineLoopInfo>(); +    if (!MLI) { +      OwnedMLI = make_unique<MachineLoopInfo>(); +      OwnedMLI->getBase().analyze(MDT->getBase()); +      MLI = OwnedMLI.get(); +    } +  } +    // Print out code for the function.    bool HasAnyRealCode = false;    int NumInstsInFunction = 0; @@ -1005,7 +1061,7 @@ void AsmPrinter::EmitFunctionBody() {      for (auto &MI : MBB) {        // Print the assembly for the instruction.        if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && -          !MI.isDebugValue()) { +          !MI.isDebugInstr()) {          HasAnyRealCode = true;          ++NumInstsInFunction;        } @@ -1044,6 +1100,12 @@ void AsmPrinter::EmitFunctionBody() {              EmitInstruction(&MI);          }          break; +      case TargetOpcode::DBG_LABEL: +        if (isVerbose()) { +          if (!emitDebugLabelComment(&MI, *this)) +            EmitInstruction(&MI); +        } +        break;        case TargetOpcode::IMPLICIT_DEF:          if (isVerbose()) emitImplicitDef(&MI);          break; @@ -1155,7 +1217,7 @@ void AsmPrinter::EmitFunctionBody() {    OutStreamer->AddBlankLine();  } -/// \brief Compute the number of Global Variables that uses a Constant. +/// Compute the number of Global Variables that uses a Constant.  static unsigned getNumGlobalVariableUses(const Constant *C) {    if (!C)      return 0; @@ -1170,7 +1232,7 @@ static unsigned getNumGlobalVariableUses(const Constant *C) {    return NumUses;  } -/// \brief Only consider global GOT equivalents if at least one user is a +/// Only consider global GOT equivalents if at least one user is a  /// cstexpr inside an initializer of another global variables. Also, don't  /// handle cstexpr inside instructions. During global variable emission,  /// candidates are skipped and are emitted later in case at least one cstexpr @@ -1193,7 +1255,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,    return NumGOTEquivUsers > 0;  } -/// \brief Unnamed constant global variables solely contaning a pointer to +/// Unnamed constant global variables solely contaning a pointer to  /// another globals variable is equivalent to a GOT table entry; it contains the  /// the address of another symbol. Optimize it and replace accesses to these  /// "GOT equivalents" by using the GOT entry for the final global instead. @@ -1214,7 +1276,7 @@ void AsmPrinter::computeGlobalGOTEquivs(Module &M) {    }  } -/// \brief Constant expressions using GOT equivalent globals may not be eligible +/// Constant expressions using GOT equivalent globals may not be eligible  /// for PC relative GOT entry conversion, in such cases we need to emit such  /// globals we previously omitted in EmitGlobalVariable.  void AsmPrinter::emitGlobalGOTEquivs() { @@ -1312,7 +1374,7 @@ bool AsmPrinter::doFinalization(Module &M) {    const TargetLoweringObjectFile &TLOF = getObjFileLowering(); -  TLOF.emitModuleMetadata(*OutStreamer, M, TM); +  TLOF.emitModuleMetadata(*OutStreamer, M);    if (TM.getTargetTriple().isOSBinFormatELF()) {      MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); @@ -1323,6 +1385,7 @@ bool AsmPrinter::doFinalization(Module &M) {        OutStreamer->SwitchSection(TLOF.getDataSection());        const DataLayout &DL = M.getDataLayout(); +      EmitAlignment(Log2_32(DL.getPointerSize()));        for (const auto &Stub : Stubs) {          OutStreamer->EmitLabel(Stub.first);          OutStreamer->EmitSymbolValue(Stub.second.getPointer(), @@ -1421,6 +1484,61 @@ bool AsmPrinter::doFinalization(Module &M) {      if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))        OutStreamer->SwitchSection(S); +  if (TM.getTargetTriple().isOSBinFormatCOFF()) { +    // Emit /EXPORT: flags for each exported global as necessary. +    const auto &TLOF = getObjFileLowering(); +    std::string Flags; + +    for (const GlobalValue &GV : M.global_values()) { +      raw_string_ostream OS(Flags); +      TLOF.emitLinkerFlagsForGlobal(OS, &GV); +      OS.flush(); +      if (!Flags.empty()) { +        OutStreamer->SwitchSection(TLOF.getDrectveSection()); +        OutStreamer->EmitBytes(Flags); +      } +      Flags.clear(); +    } + +    // Emit /INCLUDE: flags for each used global as necessary. +    if (const auto *LU = M.getNamedGlobal("llvm.used")) { +      assert(LU->hasInitializer() && +             "expected llvm.used to have an initializer"); +      assert(isa<ArrayType>(LU->getValueType()) && +             "expected llvm.used to be an array type"); +      if (const auto *A = cast<ConstantArray>(LU->getInitializer())) { +        for (const Value *Op : A->operands()) { +          const auto *GV = +              cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases()); +          // Global symbols with internal or private linkage are not visible to +          // the linker, and thus would cause an error when the linker tried to +          // preserve the symbol due to the `/include:` directive. +          if (GV->hasLocalLinkage()) +            continue; + +          raw_string_ostream OS(Flags); +          TLOF.emitLinkerFlagsForUsed(OS, GV); +          OS.flush(); + +          if (!Flags.empty()) { +            OutStreamer->SwitchSection(TLOF.getDrectveSection()); +            OutStreamer->EmitBytes(Flags); +          } +          Flags.clear(); +        } +      } +    } +  } + +  if (TM.Options.EmitAddrsig) { +    // Emit address-significance attributes for all globals. +    OutStreamer->EmitAddrsig(); +    for (const GlobalValue &GV : M.global_values()) +      if (!GV.isThreadLocal() && !GV.getName().startswith("llvm.") && +          !GV.hasAtLeastLocalUnnamedAddr()) +        OutStreamer->EmitAddrsigSym(getSymbol(&GV)); +  } +    // Allow the target to emit any magic that it wants at the end of the file,    // after everything else has gone out.    EmitEndOfAsmFile(M); @@ -1429,6 +1547,8 @@ bool AsmPrinter::doFinalization(Module &M) {    OutStreamer->Finish();    OutStreamer->reset(); +  OwnedMLI.reset(); +  OwnedMDT.reset();    return false;  } @@ -1447,14 +1567,14 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {    CurrentFnBegin = nullptr;    CurExceptionSym = nullptr;    bool NeedsLocalForSize = MAI->needsLocalForSize(); -  if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) { +  if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize || +      MF.getTarget().Options.EmitStackSizeSection) {      CurrentFnBegin = createTempSymbol("func_begin");      if (NeedsLocalForSize)        CurrentFnSymForSize = CurrentFnBegin;    }    ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); -  LI = &getAnalysis<MachineLoopInfo>();    const TargetSubtargetInfo &STI = MF.getSubtarget();    EnablePrintSchedInfo = PrintSchedule.getNumOccurrences() @@ -1842,22 +1962,27 @@ void AsmPrinter::EmitModuleIdents(Module &M) {  // Emission and print routines  // -/// EmitInt8 - Emit a byte directive and value. +/// Emit a byte directive and value.  /// -void AsmPrinter::EmitInt8(int Value) const { +void AsmPrinter::emitInt8(int Value) const {    OutStreamer->EmitIntValue(Value, 1);  } -/// EmitInt16 - Emit a short directive and value. -void AsmPrinter::EmitInt16(int Value) const { +/// Emit a short directive and value. +void AsmPrinter::emitInt16(int Value) const {    OutStreamer->EmitIntValue(Value, 2);  } -/// EmitInt32 - Emit a long directive and value. -void AsmPrinter::EmitInt32(int Value) const { +/// Emit a long directive and value. +void AsmPrinter::emitInt32(int Value) const {    OutStreamer->EmitIntValue(Value, 4);  } +/// Emit a long long directive and value. +void AsmPrinter::emitInt64(uint64_t Value) const { +  OutStreamer->EmitIntValue(Value, 8); +} +  /// Emit something like ".long Hi-Lo" where the size in bytes of the directive  /// is specified by Size and Hi/Lo specify the labels. This implicitly uses  /// .set if it avoids relocations. @@ -2069,6 +2194,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,                                     uint64_t Offset = 0);  static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); +static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP);  /// isRepeatedByteSequence - Determine whether the given value is  /// composed of a repeated sequence of identical bytes and return the @@ -2146,13 +2272,15 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,                                     ElementByteSize);      }    } else { +    Type *ET = CDS->getElementType();      for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) -      emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP); +      emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP);    }    unsigned Size = DL.getTypeAllocSize(CDS->getType());    unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *                          CDS->getNumElements(); +  assert(EmittedSize <= Size && "Size cannot be less than EmittedSize!");    if (unsigned Padding = Size - EmittedSize)      AP.OutStreamer->EmitZeros(Padding);  } @@ -2216,17 +2344,17 @@ static void emitGlobalConstantStruct(const DataLayout &DL,           "Layout of constant struct may be incorrect!");  } -static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { -  APInt API = CFP->getValueAPF().bitcastToAPInt(); +static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { +  APInt API = APF.bitcastToAPInt();    // First print a comment with what we think the original floating-point value    // should have been.    if (AP.isVerbose()) {      SmallString<8> StrVal; -    CFP->getValueAPF().toString(StrVal); +    APF.toString(StrVal); -    if (CFP->getType()) -      CFP->getType()->print(AP.OutStreamer->GetCommentOS()); +    if (ET) +      ET->print(AP.OutStreamer->GetCommentOS());      else        AP.OutStreamer->GetCommentOS() << "Printing <null> Type";      AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; @@ -2241,7 +2369,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {    // PPC's long double has odd notions of endianness compared to how LLVM    // handles it: p[0] goes first for *big* endian on PPC. -  if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { +  if (AP.getDataLayout().isBigEndian() && !ET->isPPC_FP128Ty()) {      int Chunk = API.getNumWords() - 1;      if (TrailingBytes) @@ -2260,8 +2388,11 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {    // Emit the tail padding for the long double.    const DataLayout &DL = AP.getDataLayout(); -  AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) - -                            DL.getTypeStoreSize(CFP->getType())); +  AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET)); +} + +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { +  emitGlobalConstantFP(CFP->getValueAPF(), CFP->getType(), AP);  }  static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { @@ -2320,7 +2451,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {    }  } -/// \brief Transform a not absolute MCExpr containing a reference to a GOT +/// Transform a not absolute MCExpr containing a reference to a GOT  /// equivalent global, by a target specific GOT pc relative access to the  /// final symbol.  static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, @@ -2533,6 +2664,25 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {  /// GetCPISymbol - Return the symbol for the specified constant pool entry.  MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { +  if (getSubtargetInfo().getTargetTriple().isKnownWindowsMSVCEnvironment()) { +    const MachineConstantPoolEntry &CPE = +        MF->getConstantPool()->getConstants()[CPID]; +    if (!CPE.isMachineConstantPoolEntry()) { +      const DataLayout &DL = MF->getDataLayout(); +      SectionKind Kind = CPE.getSectionKind(&DL); +      const Constant *C = CPE.Val.ConstVal; +      unsigned Align = CPE.Alignment; +      if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>( +              getObjFileLowering().getSectionForConstant(DL, Kind, C, Align))) { +        if (MCSymbol *Sym = S->getCOMDATSymbol()) { +          if (Sym->isUndefined()) +            OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); +          return Sym; +        } +      } +    } +  } +    const DataLayout &DL = getDataLayout();    return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +                                        "CPI" + Twine(getFunctionNumber()) + "_" + @@ -2631,13 +2781,9 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,  void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,                                           MCCodePaddingContext &Context) const {    assert(MF != nullptr && "Machine function must be valid"); -  assert(LI != nullptr && "Loop info must be valid");    Context.IsPaddingActive = !MF->hasInlineAsm() &&                              !MF->getFunction().optForSize() &&                              TM.getOptLevel() != CodeGenOpt::None; -  const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB); -  Context.IsBasicBlockInsideInnermostLoop = -      CurrentLoop != nullptr && CurrentLoop->getSubLoops().empty();    Context.IsBasicBlockReachableViaFallthrough =        std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=        MBB.pred_end(); @@ -2689,7 +2835,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {          OutStreamer->GetCommentOS() << '\n';        }      } -    emitBasicBlockLoopComments(MBB, LI, *this); + +    assert(MLI != nullptr && "MachineLoopInfo should has been computed"); +    emitBasicBlockLoopComments(MBB, MLI, *this);    }    // Print the main label for the block. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 08eb14e242c5..605588470670 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -17,7 +17,6 @@  #include "llvm/CodeGen/AsmPrinter.h"  #include "llvm/CodeGen/DIE.h"  #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/MC/MCAsmInfo.h"  #include "llvm/MC/MCRegisterInfo.h" @@ -26,6 +25,7 @@  #include "llvm/MC/MCSymbol.h"  #include "llvm/MC/MachineLocation.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  using namespace llvm; @@ -43,15 +43,6 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {    OutStreamer->EmitSLEB128IntValue(Value);  } -/// EmitULEB128 - emit the specified unsigned leb128 value. -void AsmPrinter::EmitPaddedULEB128(uint64_t Value, unsigned PadTo, -                                   const char *Desc) const { -  if (isVerbose() && Desc) -    OutStreamer->AddComment(Desc); - -  OutStreamer->EmitPaddedULEB128IntValue(Value, PadTo); -} -  void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {    if (isVerbose() && Desc)      OutStreamer->AddComment(Desc); @@ -59,6 +50,12 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {    OutStreamer->EmitULEB128IntValue(Value);  } +/// Emit something like ".uleb128 Hi-Lo". +void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi, +                                              const MCSymbol *Lo) const { +  OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); +} +  static const char *DecodeDWARFEncoding(unsigned Encoding) {    switch (Encoding) {    case dwarf::DW_EH_PE_absptr: @@ -67,6 +64,10 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {      return "omit";    case dwarf::DW_EH_PE_pcrel:      return "pcrel"; +  case dwarf::DW_EH_PE_uleb128: +    return "uleb128"; +  case dwarf::DW_EH_PE_sleb128: +    return "sleb128";    case dwarf::DW_EH_PE_udata4:      return "udata4";    case dwarf::DW_EH_PE_udata8: @@ -167,14 +168,19 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,    EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);  } -void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { +void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {    if (MAI->doesDwarfUseRelocationsAcrossSections()) { -    emitDwarfSymbolReference(S.getSymbol()); +    assert(S.Symbol && "No symbol available"); +    emitDwarfSymbolReference(S.Symbol);      return;    }    // Just emit the offset directly; no need for symbol math. -  EmitInt32(S.getOffset()); +  emitInt32(S.Offset); +} + +void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { +  EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize());  }  //===----------------------------------------------------------------------===// @@ -252,7 +258,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {        emitDwarfDIE(Child);      OutStreamer->AddComment("End Of Children Mark"); -    EmitInt8(0); +    emitInt8(0);    }  } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index 638226e90a7a..f5ac95a20b10 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -27,29 +27,29 @@ class MCSymbol;  typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm); -/// \brief Collects and handles AsmPrinter objects required to build debug +/// Collects and handles AsmPrinter objects required to build debug  /// or EH information.  class AsmPrinterHandler {  public:    virtual ~AsmPrinterHandler(); -  /// \brief For symbols that have a size designated (e.g. common symbols), +  /// For symbols that have a size designated (e.g. common symbols),    /// this tracks that size.    virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; -  /// \brief Emit all sections that should come after the content. +  /// Emit all sections that should come after the content.    virtual void endModule() = 0; -  /// \brief Gather pre-function debug information. +  /// Gather pre-function debug information.    /// Every beginFunction(MF) call should be followed by an endFunction(MF)    /// call.    virtual void beginFunction(const MachineFunction *MF) = 0; -  // \brief Emit any of function marker (like .cfi_endproc). This is called +  // Emit any of function marker (like .cfi_endproc). This is called    // before endFunction and cannot switch sections.    virtual void markFunctionEnd(); -  /// \brief Gather post-function debug information. +  /// Gather post-function debug information.    /// Please note that some AsmPrinter implementations may not call    /// beginFunction at all.    virtual void endFunction(const MachineFunction *MF) = 0; @@ -58,15 +58,15 @@ public:                               ExceptionSymbolProvider ESP) {}    virtual void endFragment() {} -  /// \brief Emit target-specific EH funclet machinery. +  /// Emit target-specific EH funclet machinery.    virtual void beginFunclet(const MachineBasicBlock &MBB,                              MCSymbol *Sym = nullptr) {}    virtual void endFunclet() {} -  /// \brief Process beginning of an instruction. +  /// Process beginning of an instruction.    virtual void beginInstruction(const MachineInstr *MI) = 0; -  /// \brief Process end of an instruction. +  /// Process end of an instruction.    virtual void endInstruction() = 0;  };  } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 04a72ba3d738..4159eb19423a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -132,6 +132,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,    std::unique_ptr<MCAsmParser> Parser(        createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); +  // Do not use assembler-level information for parsing inline assembly. +  OutStreamer->setUseAssemblerInfoForParsing(false); +    // We create a new MCInstrInfo here since we might be at the module level    // and not have a MachineFunction to initialize the TargetInstrInfo from and    // we only need MCInstrInfo for asm parsing. We create one unconditionally diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index aaf6180c9404..2163cc7e3e11 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -43,7 +43,7 @@ public:    APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}    void EmitInt8(uint8_t Byte, const Twine &Comment) override {      AP.OutStreamer->AddComment(Comment); -    AP.EmitInt8(Byte); +    AP.emitInt8(Byte);    }    void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {      AP.OutStreamer->AddComment(Comment); @@ -76,7 +76,7 @@ private:    SmallVectorImpl<char> &Buffer;    SmallVectorImpl<std::string> &Comments; -  /// \brief Only verbose textual output needs comments.  This will be set to +  /// Only verbose textual output needs comments.  This will be set to    /// true for that case, and false otherwise.  If false, comments passed in to    /// the emit methods will be ignored.    bool GenerateComments; @@ -93,15 +93,27 @@ public:    }    void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {      raw_svector_ostream OSE(Buffer); -    encodeSLEB128(DWord, OSE); -    if (GenerateComments) +    unsigned Length = encodeSLEB128(DWord, OSE); +    if (GenerateComments) {        Comments.push_back(Comment.str()); +      // Add some empty comments to keep the Buffer and Comments vectors aligned +      // with each other. +      for (size_t i = 1; i < Length; ++i) +        Comments.push_back(""); + +    }    }    void EmitULEB128(uint64_t DWord, const Twine &Comment) override {      raw_svector_ostream OSE(Buffer); -    encodeULEB128(DWord, OSE); -    if (GenerateComments) +    unsigned Length = encodeULEB128(DWord, OSE); +    if (GenerateComments) {        Comments.push_back(Comment.str()); +      // Add some empty comments to keep the Buffer and Comments vectors aligned +      // with each other. +      for (size_t i = 1; i < Length; ++i) +        Comments.push_back(""); + +    }    }  }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 1d0a003dc50a..8c5c5478d01a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -36,7 +36,6 @@  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/Config/llvm-config.h" @@ -75,6 +74,7 @@  #include "llvm/Support/FormatVariadic.h"  #include "llvm/Support/SMLoc.h"  #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include <algorithm>  #include <cassert> @@ -114,6 +114,16 @@ StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {    StringRef Dir = File->getDirectory(), Filename = File->getFilename(); +  // If this is a Unix-style path, just use it as is. Don't try to canonicalize +  // it textually because one of the path components could be a symlink. +  if (!Dir.empty() && Dir[0] == '/') { +    Filepath = Dir; +    if (Dir.back() != '/') +      Filepath += '/'; +    Filepath += Filename; +    return Filepath; +  } +    // Clang emits directory and relative filename info into the IR, but CodeView    // operates on full paths.  We could change Clang to emit full paths too, but    // that would increase the IR size and probably not needed for other users. @@ -165,14 +175,21 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {    auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId));    if (Insertion.second) {      // We have to compute the full filepath and emit a .cv_file directive. -    std::string Checksum = fromHex(F->getChecksum()); -    void *CKMem = OS.getContext().allocate(Checksum.size(), 1); -    memcpy(CKMem, Checksum.data(), Checksum.size()); -    ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem), -                                      Checksum.size()); -    DIFile::ChecksumKind ChecksumKind = F->getChecksumKind(); +    ArrayRef<uint8_t> ChecksumAsBytes; +    FileChecksumKind CSKind = FileChecksumKind::None; +    if (F->getChecksum()) { +      std::string Checksum = fromHex(F->getChecksum()->Value); +      void *CKMem = OS.getContext().allocate(Checksum.size(), 1); +      memcpy(CKMem, Checksum.data(), Checksum.size()); +      ChecksumAsBytes = ArrayRef<uint8_t>( +          reinterpret_cast<const uint8_t *>(CKMem), Checksum.size()); +      switch (F->getChecksum()->Kind) { +      case DIFile::CSK_MD5:  CSKind = FileChecksumKind::MD5; break; +      case DIFile::CSK_SHA1: CSKind = FileChecksumKind::SHA1; break; +      } +    }      bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, -                                          static_cast<unsigned>(ChecksumKind)); +                                          static_cast<unsigned>(CSKind));      (void)Success;      assert(Success && ".cv_file directive failed");    } @@ -358,15 +375,15 @@ unsigned CodeViewDebug::getPointerSizeInBytes() {  }  void CodeViewDebug::recordLocalVariable(LocalVariable &&Var, -                                        const DILocation *InlinedAt) { -  if (InlinedAt) { +                                        const LexicalScope *LS) { +  if (const DILocation *InlinedAt = LS->getInlinedAt()) {      // This variable was inlined. Associate it with the InlineSite.      const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram();      InlineSite &Site = getInlineSite(InlinedAt, Inlinee);      Site.InlinedLocals.emplace_back(Var);    } else { -    // This variable goes in the main ProcSym. -    CurFn->Locals.emplace_back(Var); +    // This variable goes into the corresponding lexical scope. +    ScopeVariables[LS].emplace_back(Var);    }  } @@ -463,7 +480,7 @@ void CodeViewDebug::endModule() {    // Emit per-function debug information.    for (auto &P : FnDebugInfo)      if (!P.first->isDeclarationForLinker()) -      emitDebugInfoForFunction(P.first, P.second); +      emitDebugInfoForFunction(P.first, *P.second);    // Emit global variable debug information.    setCurrentSubprogram(nullptr); @@ -501,12 +518,12 @@ void CodeViewDebug::endModule() {    clear();  } -static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) { +static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, +    unsigned MaxFixedRecordLength = 0xF00) {    // The maximum CV record length is 0xFF00. Most of the strings we emit appear    // after a fixed length portion of the record. The fixed length portion should    // always be less than 0xF00 (3840) bytes, so truncate the string so that the    // overall record size is less than the maximum allowed. -  unsigned MaxFixedRecordLength = 0xF00;    SmallString<32> NullTerminatedString(        S.take_front(MaxRecordLength - MaxFixedRecordLength - 1));    NullTerminatedString.push_back('\0'); @@ -517,7 +534,7 @@ void CodeViewDebug::emitTypeInformation() {    if (TypeTable.empty())      return; -  // Start the .debug$T section with 0x4. +  // Start the .debug$T or .debug$P section with 0x4.    OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());    emitCodeViewMagicVersion(); @@ -572,7 +589,7 @@ void CodeViewDebug::emitTypeGlobalHashes() {    OS.AddComment("Section Version");    OS.EmitIntValue(0, 2);    OS.AddComment("Hash Algorithm"); -  OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1), 2); +  OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1_8), 2);    TypeIndex TI(TypeIndex::FirstNonSimpleIndex);    for (const auto &GHR : TypeTable.hashes()) { @@ -585,7 +602,7 @@ void CodeViewDebug::emitTypeGlobalHashes() {        OS.AddComment(Comment);        ++TI;      } -    assert(GHR.Hash.size() % 20 == 0); +    assert(GHR.Hash.size() == 8);      StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()),                  GHR.Hash.size());      OS.EmitBinaryData(S); @@ -821,10 +838,61 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {      emitCodeViewMagicVersion();  } +// Emit an S_THUNK32/S_END symbol pair for a thunk routine. +// The only supported thunk ordinal is currently the standard type. +void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, +                                          FunctionInfo &FI, +                                          const MCSymbol *Fn) { +  std::string FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); +  const ThunkOrdinal ordinal = ThunkOrdinal::Standard; // Only supported kind. + +  OS.AddComment("Symbol subsection for " + Twine(FuncName)); +  MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); + +  // Emit S_THUNK32 +  MCSymbol *ThunkRecordBegin = MMI->getContext().createTempSymbol(), +           *ThunkRecordEnd   = MMI->getContext().createTempSymbol(); +  OS.AddComment("Record length"); +  OS.emitAbsoluteSymbolDiff(ThunkRecordEnd, ThunkRecordBegin, 2); +  OS.EmitLabel(ThunkRecordBegin); +  OS.AddComment("Record kind: S_THUNK32"); +  OS.EmitIntValue(unsigned(SymbolKind::S_THUNK32), 2); +  OS.AddComment("PtrParent"); +  OS.EmitIntValue(0, 4); +  OS.AddComment("PtrEnd"); +  OS.EmitIntValue(0, 4); +  OS.AddComment("PtrNext"); +  OS.EmitIntValue(0, 4); +  OS.AddComment("Thunk section relative address"); +  OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); +  OS.AddComment("Thunk section index"); +  OS.EmitCOFFSectionIndex(Fn); +  OS.AddComment("Code size"); +  OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2); +  OS.AddComment("Ordinal"); +  OS.EmitIntValue(unsigned(ordinal), 1); +  OS.AddComment("Function name"); +  emitNullTerminatedSymbolName(OS, FuncName); +  // Additional fields specific to the thunk ordinal would go here. +  OS.EmitLabel(ThunkRecordEnd); + +  // Local variables/inlined routines are purposely omitted here.  The point of +  // marking this as a thunk is so Visual Studio will NOT stop in this routine. + +  // Emit S_PROC_ID_END +  const unsigned RecordLengthForSymbolEnd = 2; +  OS.AddComment("Record length"); +  OS.EmitIntValue(RecordLengthForSymbolEnd, 2); +  OS.AddComment("Record kind: S_PROC_ID_END"); +  OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2); + +  endCVSubsection(SymbolsEnd); +} +  void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,                                               FunctionInfo &FI) { -  // For each function there is a separate subsection -  // which holds the PC to file:line table. +  // For each function there is a separate subsection which holds the PC to +  // file:line table.    const MCSymbol *Fn = Asm->getSymbol(GV);    assert(Fn); @@ -836,6 +904,11 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,    assert(SP);    setCurrentSubprogram(SP); +  if (SP->isThunk()) { +    emitDebugInfoForThunk(GV, FI, Fn); +    return; +  } +    // If we have a display name, build the fully qualified name by walking the    // chain of scopes.    if (!SP->getName().empty()) @@ -898,6 +971,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,      OS.EmitLabel(ProcRecordEnd);      emitLocalVariableList(FI.Locals); +    emitLexicalBlockList(FI.ChildBlocks, FI);      // Emit inlined call site information. Only emit functions inlined directly      // into the parent function. We'll emit the other sites recursively as part @@ -1018,7 +1092,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(      LocalVariable Var;      Var.DIVar = VI.Var;      Var.DefRanges.emplace_back(std::move(DefRange)); -    recordLocalVariable(std::move(Var), VI.Loc->getInlinedAt()); +    recordLocalVariable(std::move(Var), Scope);    }  } @@ -1100,7 +1174,7 @@ void CodeViewDebug::calculateRanges(        auto J = std::next(I);        const DIExpression *DIExpr = DVInst->getDebugExpression();        while (J != E && -             !fragmentsOverlap(DIExpr, J->first->getDebugExpression())) +             !DIExpr->fragmentsOverlap(J->first->getDebugExpression()))          ++J;        if (J != E)          End = getLabelBeforeInsn(J->first); @@ -1149,14 +1223,15 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {      Var.DIVar = DIVar;      calculateRanges(Var, Ranges); -    recordLocalVariable(std::move(Var), InlinedAt); +    recordLocalVariable(std::move(Var), Scope);    }  }  void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {    const Function &GV = MF->getFunction(); -  assert(FnDebugInfo.count(&GV) == false); -  CurFn = &FnDebugInfo[&GV]; +  auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()}); +  assert(Insertion.second && "function already has info"); +  CurFn = Insertion.first->second.get();    CurFn->FuncId = NextFuncId++;    CurFn->Begin = Asm->getFunctionBegin(); @@ -1261,6 +1336,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {      return lowerTypePointer(cast<DIDerivedType>(Ty));    case dwarf::DW_TAG_ptr_to_member_type:      return lowerTypeMemberPointer(cast<DIDerivedType>(Ty)); +  case dwarf::DW_TAG_restrict_type:    case dwarf::DW_TAG_const_type:    case dwarf::DW_TAG_volatile_type:    // TODO: add support for DW_TAG_atomic_type here @@ -1281,6 +1357,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {      return lowerTypeClass(cast<DICompositeType>(Ty));    case dwarf::DW_TAG_union_type:      return lowerTypeUnion(cast<DICompositeType>(Ty)); +  case dwarf::DW_TAG_unspecified_type: +    return TypeIndex::None();    default:      // Use the null type index.      return TypeIndex(); @@ -1308,7 +1386,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {    DITypeRef ElementTypeRef = Ty->getBaseType();    TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);    // IndexType is size_t, which depends on the bitness of the target. -  TypeIndex IndexType = Asm->TM.getPointerSize() == 8 +  TypeIndex IndexType = getPointerSizeInBytes() == 8                              ? TypeIndex(SimpleTypeKind::UInt64Quad)                              : TypeIndex(SimpleTypeKind::UInt32Long); @@ -1323,7 +1401,9 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {      const DISubrange *Subrange = cast<DISubrange>(Element);      assert(Subrange->getLowerBound() == 0 &&             "codeview doesn't support subranges with lower bounds"); -    int64_t Count = Subrange->getCount(); +    int64_t Count = -1; +    if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) +      Count = CI->getSExtValue();      // Forward declarations of arrays without a size and VLAs use a count of -1.      // Emit a count of zero in these cases to match what MSVC does for arrays @@ -1441,12 +1521,13 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {    return TypeIndex(STK);  } -TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { +TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty, +                                          PointerOptions PO) {    TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType()); -  // Pointers to simple types can use SimpleTypeMode, rather than having a -  // dedicated pointer type record. -  if (PointeeTI.isSimple() && +  // Pointers to simple types without any options can use SimpleTypeMode, rather +  // than having a dedicated pointer type record. +  if (PointeeTI.isSimple() && PO == PointerOptions::None &&        PointeeTI.getSimpleMode() == SimpleTypeMode::Direct &&        Ty->getTag() == dwarf::DW_TAG_pointer_type) {      SimpleTypeMode Mode = Ty->getSizeInBits() == 64 @@ -1470,10 +1551,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {      PM = PointerMode::RValueReference;      break;    } -  // FIXME: MSVC folds qualifiers into PointerOptions in the context of a method -  // 'this' pointer, but not normal contexts. Figure out what we're supposed to -  // do. -  PointerOptions PO = PointerOptions::None; +    PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);    return TypeTable.writeLeafType(PR);  } @@ -1511,16 +1589,17 @@ translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) {    llvm_unreachable("invalid ptr to member representation");  } -TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { +TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty, +                                                PointerOptions PO) {    assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);    TypeIndex ClassTI = getTypeIndex(Ty->getClassType());    TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType()); -  PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64 -                                                 : PointerKind::Near32; +  PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 +                                                : PointerKind::Near32;    bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());    PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction                           : PointerMode::PointerToDataMember; -  PointerOptions PO = PointerOptions::None; // FIXME +    assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big");    uint8_t SizeInBytes = Ty->getSizeInBits() / 8;    MemberPointerInfo MPI( @@ -1545,6 +1624,7 @@ static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) {  TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {    ModifierOptions Mods = ModifierOptions::None; +  PointerOptions PO = PointerOptions::None;    bool IsModifier = true;    const DIType *BaseTy = Ty;    while (IsModifier && BaseTy) { @@ -1552,9 +1632,16 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {      switch (BaseTy->getTag()) {      case dwarf::DW_TAG_const_type:        Mods |= ModifierOptions::Const; +      PO |= PointerOptions::Const;        break;      case dwarf::DW_TAG_volatile_type:        Mods |= ModifierOptions::Volatile; +      PO |= PointerOptions::Volatile; +      break; +    case dwarf::DW_TAG_restrict_type: +      // Only pointer types be marked with __restrict. There is no known flag +      // for __restrict in LF_MODIFIER records. +      PO |= PointerOptions::Restrict;        break;      default:        IsModifier = false; @@ -1563,7 +1650,31 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {      if (IsModifier)        BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();    } + +  // Check if the inner type will use an LF_POINTER record. If so, the +  // qualifiers will go in the LF_POINTER record. This comes up for types like +  // 'int *const' and 'int *__restrict', not the more common cases like 'const +  // char *'. +  if (BaseTy) { +    switch (BaseTy->getTag()) { +    case dwarf::DW_TAG_pointer_type: +    case dwarf::DW_TAG_reference_type: +    case dwarf::DW_TAG_rvalue_reference_type: +      return lowerTypePointer(cast<DIDerivedType>(BaseTy), PO); +    case dwarf::DW_TAG_ptr_to_member_type: +      return lowerTypeMemberPointer(cast<DIDerivedType>(BaseTy), PO); +    default: +      break; +    } +  } +    TypeIndex ModifiedTI = getTypeIndex(BaseTy); + +  // Return the base type index if there aren't any modifiers. For example, the +  // metadata could contain restrict wrappers around non-pointer types. +  if (Mods == ModifierOptions::None) +    return ModifiedTI; +    ModifierRecord MR(ModifiedTI, Mods);    return TypeTable.writeLeafType(MR);  } @@ -1573,6 +1684,11 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {    for (DITypeRef ArgTypeRef : Ty->getTypeArray())      ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); +  // MSVC uses type none for variadic argument. +  if (ReturnAndArgTypeIndices.size() > 1 && +      ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { +    ReturnAndArgTypeIndices.back() = TypeIndex::None(); +  }    TypeIndex ReturnTypeIndex = TypeIndex::Void();    ArrayRef<TypeIndex> ArgTypeIndices = None;    if (!ReturnAndArgTypeIndices.empty()) { @@ -1602,6 +1718,11 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,    for (DITypeRef ArgTypeRef : Ty->getTypeArray())      ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); +  // MSVC uses type none for variadic argument. +  if (ReturnAndArgTypeIndices.size() > 1 && +      ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { +    ReturnAndArgTypeIndices.back() = TypeIndex::None(); +  }    TypeIndex ReturnTypeIndex = TypeIndex::Void();    ArrayRef<TypeIndex> ArgTypeIndices = None;    if (!ReturnAndArgTypeIndices.empty()) { @@ -1716,6 +1837,26 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {    return CO;  } +void CodeViewDebug::addUDTSrcLine(const DIType *Ty, TypeIndex TI) { +  switch (Ty->getTag()) { +  case dwarf::DW_TAG_class_type: +  case dwarf::DW_TAG_structure_type: +  case dwarf::DW_TAG_union_type: +  case dwarf::DW_TAG_enumeration_type: +    break; +  default: +    return; +  } + +  if (const auto *File = Ty->getFile()) { +    StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); +    TypeIndex SIDI = TypeTable.writeLeafType(SIDR); + +    UdtSourceLineRecord USLR(TI, SIDI, Ty->getLine()); +    TypeTable.writeLeafType(USLR); +  } +} +  TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {    ClassOptions CO = getCommonClassOptions(Ty);    TypeIndex FTI; @@ -1744,7 +1885,11 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {    EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),                  getTypeIndex(Ty->getBaseType())); -  return TypeTable.writeLeafType(ER); +  TypeIndex EnumTI = TypeTable.writeLeafType(ER); + +  addUDTSrcLine(Ty, EnumTI); + +  return EnumTI;  }  //===----------------------------------------------------------------------===// @@ -1793,12 +1938,33 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,      Info.Members.push_back({DDTy, 0});      return;    } -  // An unnamed member must represent a nested struct or union. Add all the -  // indirect fields to the current record. + +  // An unnamed member may represent a nested struct or union. Attempt to +  // interpret the unnamed member as a DICompositeType possibly wrapped in +  // qualifier types. Add all the indirect fields to the current record if that +  // succeeds, and drop the member if that fails.    assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");    uint64_t Offset = DDTy->getOffsetInBits();    const DIType *Ty = DDTy->getBaseType().resolve(); -  const DICompositeType *DCTy = cast<DICompositeType>(Ty); +  bool FullyResolved = false; +  while (!FullyResolved) { +    switch (Ty->getTag()) { +    case dwarf::DW_TAG_const_type: +    case dwarf::DW_TAG_volatile_type: +      // FIXME: we should apply the qualifier types to the indirect fields +      // rather than dropping them. +      Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve(); +      break; +    default: +      FullyResolved = true; +      break; +    } +  } + +  const DICompositeType *DCTy = dyn_cast<DICompositeType>(Ty); +  if (!DCTy) +    return; +    ClassInfo NestedInfo = collectClassInfo(DCTy);    for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members)      Info.Members.push_back( @@ -1838,7 +2004,28 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {    return Info;  } +static bool shouldAlwaysEmitCompleteClassType(const DICompositeType *Ty) { +  // This routine is used by lowerTypeClass and lowerTypeUnion to determine +  // if a complete type should be emitted instead of a forward reference. +  return Ty->getName().empty() && Ty->getIdentifier().empty() && +      !Ty->isForwardDecl(); +} +  TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) { +  // Emit the complete type for unnamed structs.  C++ classes with methods +  // which have a circular reference back to the class type are expected to +  // be named by the front-end and should not be "unnamed".  C unnamed +  // structs should not have circular references. +  if (shouldAlwaysEmitCompleteClassType(Ty)) { +    // If this unnamed complete type is already in the process of being defined +    // then the description of the type is malformed and cannot be emitted +    // into CodeView correctly so report a fatal error. +    auto I = CompleteTypeIndices.find(Ty); +    if (I != CompleteTypeIndices.end() && I->second == TypeIndex()) +      report_fatal_error("cannot debug circular reference to unnamed type"); +    return getCompleteTypeIndex(Ty); +  } +    // First, construct the forward decl.  Don't look into Ty to compute the    // forward decl options, since it might not be available in all TUs.    TypeRecordKind Kind = getRecordKind(Ty); @@ -1875,13 +2062,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {                   SizeInBytes, FullName, Ty->getIdentifier());    TypeIndex ClassTI = TypeTable.writeLeafType(CR); -  if (const auto *File = Ty->getFile()) { -    StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); -    TypeIndex SIDI = TypeTable.writeLeafType(SIDR); - -    UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); -    TypeTable.writeLeafType(USLR); -  } +  addUDTSrcLine(Ty, ClassTI);    addToUDTs(Ty); @@ -1889,6 +2070,10 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {  }  TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) { +  // Emit the complete type for unnamed unions. +  if (shouldAlwaysEmitCompleteClassType(Ty)) +    return getCompleteTypeIndex(Ty); +    ClassOptions CO =        ClassOptions::ForwardReference | getCommonClassOptions(Ty);    std::string FullName = getFullyQualifiedName(Ty); @@ -1917,11 +2102,7 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {                   Ty->getIdentifier());    TypeIndex UnionTI = TypeTable.writeLeafType(UR); -  StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); -  TypeIndex SIRI = TypeTable.writeLeafType(SIR); - -  UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine()); -  TypeTable.writeLeafType(USLR); +  addUDTSrcLine(Ty, UnionTI);    addToUDTs(Ty); @@ -1943,8 +2124,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {    for (const DIDerivedType *I : Info.Inheritance) {      if (I->getFlags() & DINode::FlagVirtual) {        // Virtual base. -      // FIXME: Emit VBPtrOffset when the frontend provides it. -      unsigned VBPtrOffset = 0; +      unsigned VBPtrOffset = I->getVBPtrOffset();        // FIXME: Despite the accessor name, the offset is really in bytes.        unsigned VBTableIndex = I->getOffsetInBits() / 4;        auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase @@ -1956,6 +2136,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {            VBTableIndex);        ContinuationBuilder.writeMemberType(VBCR); +      MemberCount++;      } else {        assert(I->getOffsetInBits() % 8 == 0 &&               "bases must be on byte boundaries"); @@ -1963,6 +2144,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {                            getTypeIndex(I->getBaseType()),                            I->getOffsetInBits() / 8);        ContinuationBuilder.writeMemberType(BCR); +      MemberCount++;      }    } @@ -2121,9 +2303,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {      return getTypeIndex(Ty);    } -  // Check if we've already translated the complete record type.  Lowering a -  // complete type should never trigger lowering another complete type, so we -  // can reuse the hash table lookup result. +  // Check if we've already translated the complete record type.    const auto *CTy = cast<DICompositeType>(Ty);    auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});    if (!InsertResult.second) @@ -2134,13 +2314,16 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {    // Make sure the forward declaration is emitted first. It's unclear if this    // is necessary, but MSVC does it, and we should follow suit until we can show    // otherwise. -  TypeIndex FwdDeclTI = getTypeIndex(CTy); +  // We only emit a forward declaration for named types. +  if (!CTy->getName().empty() || !CTy->getIdentifier().empty()) { +    TypeIndex FwdDeclTI = getTypeIndex(CTy); -  // Just use the forward decl if we don't have complete type info. This might -  // happen if the frontend is using modules and expects the complete definition -  // to be emitted elsewhere. -  if (CTy->isForwardDecl()) -    return FwdDeclTI; +    // Just use the forward decl if we don't have complete type info. This +    // might happen if the frontend is using modules and expects the complete +    // definition to be emitted elsewhere. +    if (CTy->isForwardDecl()) +      return FwdDeclTI; +  }    TypeIndex TI;    switch (CTy->getTag()) { @@ -2155,7 +2338,11 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {      llvm_unreachable("not a record");    } -  InsertResult.first->second = TI; +  // Update the type index associated with this CompositeType.  This cannot +  // use the 'InsertResult' iterator above because it is potentially +  // invalidated by map insertions which can occur while lowering the class +  // type above. +  CompleteTypeIndices[CTy] = TI;    return TI;  } @@ -2179,10 +2366,10 @@ void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) {    for (const LocalVariable &L : Locals)      if (L.DIVar->isParameter())        Params.push_back(&L); -  std::sort(Params.begin(), Params.end(), -            [](const LocalVariable *L, const LocalVariable *R) { -              return L->DIVar->getArg() < R->DIVar->getArg(); -            }); +  llvm::sort(Params.begin(), Params.end(), +             [](const LocalVariable *L, const LocalVariable *R) { +               return L->DIVar->getArg() < R->DIVar->getArg(); +             });    for (const LocalVariable *L : Params)      emitLocalVariable(*L); @@ -2272,15 +2459,150 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {    }  } +void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, +                                         const FunctionInfo& FI) { +  for (LexicalBlock *Block : Blocks) +    emitLexicalBlock(*Block, FI); +} + +/// Emit an S_BLOCK32 and S_END record pair delimiting the contents of a +/// lexical block scope. +void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, +                                     const FunctionInfo& FI) { +  MCSymbol *RecordBegin = MMI->getContext().createTempSymbol(), +           *RecordEnd   = MMI->getContext().createTempSymbol(); + +  // Lexical block symbol record. +  OS.AddComment("Record length"); +  OS.emitAbsoluteSymbolDiff(RecordEnd, RecordBegin, 2);   // Record Length +  OS.EmitLabel(RecordBegin); +  OS.AddComment("Record kind: S_BLOCK32"); +  OS.EmitIntValue(SymbolKind::S_BLOCK32, 2);              // Record Kind +  OS.AddComment("PtrParent"); +  OS.EmitIntValue(0, 4);                                  // PtrParent +  OS.AddComment("PtrEnd"); +  OS.EmitIntValue(0, 4);                                  // PtrEnd +  OS.AddComment("Code size"); +  OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4);   // Code Size +  OS.AddComment("Function section relative address"); +  OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0);         // Func Offset +  OS.AddComment("Function section index"); +  OS.EmitCOFFSectionIndex(FI.Begin);                      // Func Symbol +  OS.AddComment("Lexical block name"); +  emitNullTerminatedSymbolName(OS, Block.Name);           // Name +  OS.EmitLabel(RecordEnd); + +  // Emit variables local to this lexical block. +  emitLocalVariableList(Block.Locals); + +  // Emit lexical blocks contained within this block. +  emitLexicalBlockList(Block.Children, FI); + +  // Close the lexical block scope. +  OS.AddComment("Record length"); +  OS.EmitIntValue(2, 2);                                  // Record Length +  OS.AddComment("Record kind: S_END"); +  OS.EmitIntValue(SymbolKind::S_END, 2);                  // Record Kind +} + +/// Convenience routine for collecting lexical block information for a list +/// of lexical scopes. +void CodeViewDebug::collectLexicalBlockInfo( +        SmallVectorImpl<LexicalScope *> &Scopes, +        SmallVectorImpl<LexicalBlock *> &Blocks, +        SmallVectorImpl<LocalVariable> &Locals) { +  for (LexicalScope *Scope : Scopes) +    collectLexicalBlockInfo(*Scope, Blocks, Locals); +} + +/// Populate the lexical blocks and local variable lists of the parent with +/// information about the specified lexical scope. +void CodeViewDebug::collectLexicalBlockInfo( +    LexicalScope &Scope, +    SmallVectorImpl<LexicalBlock *> &ParentBlocks, +    SmallVectorImpl<LocalVariable> &ParentLocals) { +  if (Scope.isAbstractScope()) +    return; + +  auto LocalsIter = ScopeVariables.find(&Scope); +  if (LocalsIter == ScopeVariables.end()) { +    // This scope does not contain variables and can be eliminated. +    collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); +    return; +  } +  SmallVectorImpl<LocalVariable> &Locals = LocalsIter->second; + +  const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode()); +  if (!DILB) { +    // This scope is not a lexical block and can be eliminated, but keep any +    // local variables it contains. +    ParentLocals.append(Locals.begin(), Locals.end()); +    collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); +    return; +  } + +  const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges(); +  if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) { +    // This lexical block scope has too many address ranges to represent in the +    // current CodeView format or does not have a valid address range. +    // Eliminate this lexical scope and promote any locals it contains to the +    // parent scope. +    // +    // For lexical scopes with multiple address ranges you may be tempted to +    // construct a single range covering every instruction where the block is +    // live and everything in between.  Unfortunately, Visual Studio only +    // displays variables from the first matching lexical block scope.  If the +    // first lexical block contains exception handling code or cold code which +    // is moved to the bottom of the routine creating a single range covering +    // nearly the entire routine, then it will hide all other lexical blocks +    // and the variables they contain. +    // +    ParentLocals.append(Locals.begin(), Locals.end()); +    collectLexicalBlockInfo(Scope.getChildren(), ParentBlocks, ParentLocals); +    return; +  } + +  // Create a new CodeView lexical block for this lexical scope.  If we've +  // seen this DILexicalBlock before then the scope tree is malformed and +  // we can handle this gracefully by not processing it a second time. +  auto BlockInsertion = CurFn->LexicalBlocks.insert({DILB, LexicalBlock()}); +  if (!BlockInsertion.second) +    return; + +  // Create a lexical block containing the local variables and collect the +  // the lexical block information for the children. +  const InsnRange &Range = Ranges.front(); +  assert(Range.first && Range.second); +  LexicalBlock &Block = BlockInsertion.first->second; +  Block.Begin = getLabelBeforeInsn(Range.first); +  Block.End = getLabelAfterInsn(Range.second); +  assert(Block.Begin && "missing label for scope begin"); +  assert(Block.End && "missing label for scope end"); +  Block.Name = DILB->getName(); +  Block.Locals = std::move(Locals); +  ParentBlocks.push_back(&Block); +  collectLexicalBlockInfo(Scope.getChildren(), Block.Children, Block.Locals); +} +  void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {    const Function &GV = MF->getFunction();    assert(FnDebugInfo.count(&GV)); -  assert(CurFn == &FnDebugInfo[&GV]); +  assert(CurFn == FnDebugInfo[&GV].get());    collectVariableInfo(GV.getSubprogram()); +  // Build the lexical block structure to emit for this routine. +  if (LexicalScope *CFS = LScopes.getCurrentFunctionScope()) +    collectLexicalBlockInfo(*CFS, CurFn->ChildBlocks, CurFn->Locals); + +  // Clear the scope and variable information from the map which will not be +  // valid after we have finished processing this routine.  This also prepares +  // the map for the subsequent routine. +  ScopeVariables.clear(); +    // Don't emit anything if we don't have any line tables. -  if (!CurFn->HaveLineInfo) { +  // Thunks are compiler-generated and probably won't have source correlation. +  if (!CurFn->HaveLineInfo && !GV.getSubprogram()->isThunk()) {      FnDebugInfo.erase(&GV);      CurFn = nullptr;      return; @@ -2296,8 +2618,8 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {  void CodeViewDebug::beginInstruction(const MachineInstr *MI) {    DebugHandlerBase::beginInstruction(MI); -  // Ignore DBG_VALUE locations and function prologue. -  if (!Asm || !CurFn || MI->isDebugValue() || +  // Ignore DBG_VALUE and DBG_LABEL locations and function prologue. +  if (!Asm || !CurFn || MI->isDebugInstr() ||        MI->getFlag(MachineInstr::FrameSetup))      return; @@ -2306,7 +2628,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {    DebugLoc DL = MI->getDebugLoc();    if (!DL && MI->getParent() != PrevInstBB) {      for (const auto &NextMI : *MI->getParent()) { -      if (NextMI.isDebugValue()) +      if (NextMI.isDebugInstr())          continue;        DL = NextMI.getDebugLoc();        if (DL) @@ -2432,6 +2754,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,    // FIXME: Thread local data, etc    MCSymbol *DataBegin = MMI->getContext().createTempSymbol(),             *DataEnd = MMI->getContext().createTempSymbol(); +  const unsigned FixedLengthOfThisRecord = 12;    OS.AddComment("Record length");    OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);    OS.EmitLabel(DataBegin); @@ -2459,6 +2782,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,    OS.AddComment("Segment");    OS.EmitCOFFSectionIndex(GVSym);    OS.AddComment("Name"); -  emitNullTerminatedSymbolName(OS, DIGV->getName()); +  emitNullTerminatedSymbolName(OS, DIGV->getName(), FixedLengthOfThisRecord);    OS.EmitLabel(DataEnd);  } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 69e93640d7ef..6a0da5f993d0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -48,7 +48,7 @@ class MCStreamer;  class MCSymbol;  class MachineFunction; -/// \brief Collects and handles line tables information in a CodeView format. +/// Collects and handles line tables information in a CodeView format.  class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    MCStreamer &OS;    BumpPtrAllocator Allocator; @@ -107,9 +107,23 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {      unsigned SiteFuncId = 0;    }; +  // Combines information from DILexicalBlock and LexicalScope. +  struct LexicalBlock { +    SmallVector<LocalVariable, 1> Locals; +    SmallVector<LexicalBlock *, 1> Children; +    const MCSymbol *Begin; +    const MCSymbol *End; +    StringRef Name; +  }; +    // For each function, store a vector of labels to its instructions, as well as    // to the end of the function.    struct FunctionInfo { +    FunctionInfo() = default; + +    // Uncopyable. +    FunctionInfo(const FunctionInfo &FI) = delete; +      /// Map from inlined call site to inlined instructions and child inlined      /// call sites. Listed in program order.      std::unordered_map<const DILocation *, InlineSite> InlineSites; @@ -119,6 +133,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {      SmallVector<LocalVariable, 1> Locals; +    std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks; + +    // Lexical blocks containing local variables. +    SmallVector<LexicalBlock *, 1> ChildBlocks; +      std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;      const MCSymbol *Begin = nullptr; @@ -129,6 +148,12 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    };    FunctionInfo *CurFn = nullptr; +  // Map used to seperate variables according to the lexical scope they belong +  // in.  This is populated by recordLocalVariable() before +  // collectLexicalBlocks() separates the variables between the FunctionInfo +  // and LexicalBlocks. +  DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables; +    /// The set of comdat .debug$S sections that we've seen so far. Each section    /// must start with a magic version number that must only be emitted once.    /// This set tracks which sections we've already opened. @@ -159,7 +184,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    /// Remember some debug info about each function. Keep it in a stable order to    /// emit at the end of the TU. -  MapVector<const Function *, FunctionInfo> FnDebugInfo; +  MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo;    /// Map from full file path to .cv_file id. Full paths are built from DIFiles    /// and are stored in FileToFilepathMap; @@ -200,7 +225,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    using FileToFilepathMapTy = std::map<const DIFile *, std::string>;    FileToFilepathMapTy FileToFilepathMap; -  StringRef getFullFilepath(const DIFile *S); +  StringRef getFullFilepath(const DIFile *File);    unsigned maybeRecordFile(const DIFile *F); @@ -214,7 +239,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    }    /// Emit the magic version number at the start of a CodeView type or symbol -  /// section. Appears at the front of every .debug$S or .debug$T section. +  /// section. Appears at the front of every .debug$S or .debug$T or .debug$P +  /// section.    void emitCodeViewMagicVersion();    void emitTypeInformation(); @@ -225,6 +251,10 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    void emitInlineeLinesSubsection(); +  void emitDebugInfoForThunk(const Function *GV, +                             FunctionInfo &FI, +                             const MCSymbol *Fn); +    void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);    void emitDebugInfoForGlobals(); @@ -253,9 +283,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed); +  // Construct the lexical block tree for a routine, pruning emptpy lexical +  // scopes, and populate it with local variables. +  void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes, +                               SmallVectorImpl<LexicalBlock *> &Blocks, +                               SmallVectorImpl<LocalVariable> &Locals); +  void collectLexicalBlockInfo(LexicalScope &Scope, +                               SmallVectorImpl<LexicalBlock *> &ParentBlocks, +                               SmallVectorImpl<LocalVariable> &ParentLocals); +    /// Records information about a local variable in the appropriate scope. In    /// particular, locals from inlined code live inside the inlining site. -  void recordLocalVariable(LocalVariable &&Var, const DILocation *Loc); +  void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS);    /// Emits local variables in the appropriate order.    void emitLocalVariableList(ArrayRef<LocalVariable> Locals); @@ -263,6 +302,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    /// Emits an S_LOCAL record and its associated defined ranges.    void emitLocalVariable(const LocalVariable &Var); +  /// Emits a sequence of lexical block scopes and their children. +  void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, +                            const FunctionInfo& FI); + +  /// Emit a lexical block scope and its children. +  void emitLexicalBlock(const LexicalBlock &Block, const FunctionInfo& FI); +    /// Translates the DIType to codeview if necessary and returns a type index    /// for it.    codeview::TypeIndex getTypeIndex(DITypeRef TypeRef, @@ -279,12 +325,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    void addToUDTs(const DIType *Ty); +  void addUDTSrcLine(const DIType *Ty, codeview::TypeIndex TI); +    codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);    codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);    codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);    codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty); -  codeview::TypeIndex lowerTypePointer(const DIDerivedType *Ty); -  codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty); +  codeview::TypeIndex lowerTypePointer( +      const DIDerivedType *Ty, +      codeview::PointerOptions PO = codeview::PointerOptions::None); +  codeview::TypeIndex lowerTypeMemberPointer( +      const DIDerivedType *Ty, +      codeview::PointerOptions PO = codeview::PointerOptions::None);    codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);    codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);    codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty); @@ -327,21 +379,21 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {    unsigned getPointerSizeInBytes();  protected: -  /// \brief Gather pre-function debug information. +  /// Gather pre-function debug information.    void beginFunctionImpl(const MachineFunction *MF) override; -  /// \brief Gather post-function debug information. +  /// Gather post-function debug information.    void endFunctionImpl(const MachineFunction *) override;  public: -  CodeViewDebug(AsmPrinter *Asm); +  CodeViewDebug(AsmPrinter *AP);    void setSymbolSize(const MCSymbol *, uint64_t) override {} -  /// \brief Emit the COFF section that holds the line table information. +  /// Emit the COFF section that holds the line table information.    void endModule() override; -  /// \brief Process beginning of an instruction. +  /// Process beginning of an instruction.    void beginInstruction(const MachineInstr *MI) override;  }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index b3148db30cd6..570424a79c81 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -17,6 +17,7 @@  #include "DwarfUnit.h"  #include "llvm/ADT/Twine.h"  #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/MC/MCAsmInfo.h"  #include "llvm/MC/MCContext.h" @@ -86,8 +87,9 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {      // easily, which helps track down where it came from.      if (!dwarf::isValidFormForVersion(AttrData.getForm(),                                        AP->getDwarfVersion())) { -      DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) -                   << " for DWARF version " << AP->getDwarfVersion() << "\n"); +      LLVM_DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) +                        << " for DWARF version " << AP->getDwarfVersion() +                        << "\n");        llvm_unreachable("Invalid form for specified DWARF version");      }  #endif @@ -388,6 +390,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {    case dwarf::DW_FORM_data2:    case dwarf::DW_FORM_strx2:    case dwarf::DW_FORM_addrx2: +  case dwarf::DW_FORM_strx3:    case dwarf::DW_FORM_strp:    case dwarf::DW_FORM_ref4:    case dwarf::DW_FORM_data4: @@ -410,6 +413,7 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {    case dwarf::DW_FORM_GNU_str_index:    case dwarf::DW_FORM_GNU_addr_index:    case dwarf::DW_FORM_ref_udata: +  case dwarf::DW_FORM_strx:    case dwarf::DW_FORM_udata:      Asm->EmitULEB128(Integer);      return; @@ -423,58 +427,23 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {  /// SizeOf - Determine size of integer value in bytes.  ///  unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { +  dwarf::FormParams Params = {0, 0, dwarf::DWARF32}; +  if (AP) +    Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()), +              AP->OutStreamer->getContext().getDwarfFormat()}; + +  if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params)) +    return *FixedSize; +    switch (Form) { -  case dwarf::DW_FORM_implicit_const: -  case dwarf::DW_FORM_flag_present: -    return 0; -  case dwarf::DW_FORM_flag: -  case dwarf::DW_FORM_ref1: -  case dwarf::DW_FORM_data1: -  case dwarf::DW_FORM_strx1: -  case dwarf::DW_FORM_addrx1: -    return sizeof(int8_t); -  case dwarf::DW_FORM_ref2: -  case dwarf::DW_FORM_data2: -  case dwarf::DW_FORM_strx2: -  case dwarf::DW_FORM_addrx2: -    return sizeof(int16_t); -  case dwarf::DW_FORM_ref4: -  case dwarf::DW_FORM_data4: -  case dwarf::DW_FORM_ref_sup4: -  case dwarf::DW_FORM_strx4: -  case dwarf::DW_FORM_addrx4: -    return sizeof(int32_t); -  case dwarf::DW_FORM_ref8: -  case dwarf::DW_FORM_ref_sig8: -  case dwarf::DW_FORM_data8: -  case dwarf::DW_FORM_ref_sup8: -    return sizeof(int64_t); -  case dwarf::DW_FORM_ref_addr: -    if (AP->getDwarfVersion() == 2) -      return AP->getPointerSize(); -    LLVM_FALLTHROUGH; -  case dwarf::DW_FORM_strp: -  case dwarf::DW_FORM_GNU_ref_alt: -  case dwarf::DW_FORM_GNU_strp_alt: -  case dwarf::DW_FORM_line_strp: -  case dwarf::DW_FORM_sec_offset: -  case dwarf::DW_FORM_strp_sup: -    switch (AP->OutStreamer->getContext().getDwarfFormat()) { -    case dwarf::DWARF32: -      return 4; -    case dwarf::DWARF64: -      return 8; -    } -    llvm_unreachable("Invalid DWARF format");    case dwarf::DW_FORM_GNU_str_index:    case dwarf::DW_FORM_GNU_addr_index:    case dwarf::DW_FORM_ref_udata: +  case dwarf::DW_FORM_strx:    case dwarf::DW_FORM_udata:      return getULEB128Size(Integer);    case dwarf::DW_FORM_sdata:      return getSLEB128Size(Integer); -  case dwarf::DW_FORM_addr: -    return AP->getPointerSize();    default: llvm_unreachable("DIE Value form not supported yet");    }  } @@ -564,44 +533,46 @@ void DIEDelta::print(raw_ostream &O) const {  /// EmitValue - Emit string value.  ///  void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { -  assert( -      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && -      "Expected valid string form"); -    // Index of string in symbol table. -  if (Form == dwarf::DW_FORM_GNU_str_index) { +  switch (Form) { +  case dwarf::DW_FORM_GNU_str_index: +  case dwarf::DW_FORM_strx: +  case dwarf::DW_FORM_strx1: +  case dwarf::DW_FORM_strx2: +  case dwarf::DW_FORM_strx3: +  case dwarf::DW_FORM_strx4:      DIEInteger(S.getIndex()).EmitValue(AP, Form);      return; -  } - -  // Relocatable symbol. -  assert(Form == dwarf::DW_FORM_strp); -  if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { -    DIELabel(S.getSymbol()).EmitValue(AP, Form); +  case dwarf::DW_FORM_strp: +    if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) +      DIELabel(S.getSymbol()).EmitValue(AP, Form); +    else +      DIEInteger(S.getOffset()).EmitValue(AP, Form);      return; +  default: +    llvm_unreachable("Expected valid string form");    } - -  // Offset into symbol table. -  DIEInteger(S.getOffset()).EmitValue(AP, Form);  }  /// SizeOf - Determine size of delta value in bytes.  ///  unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { -  assert( -      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && -      "Expected valid string form"); -    // Index of string in symbol table. -  if (Form == dwarf::DW_FORM_GNU_str_index) +  switch (Form) { +  case dwarf::DW_FORM_GNU_str_index: +  case dwarf::DW_FORM_strx: +  case dwarf::DW_FORM_strx1: +  case dwarf::DW_FORM_strx2: +  case dwarf::DW_FORM_strx3: +  case dwarf::DW_FORM_strx4:      return DIEInteger(S.getIndex()).SizeOf(AP, Form); - -  // Relocatable symbol. -  if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) -    return DIELabel(S.getSymbol()).SizeOf(AP, Form); - -  // Offset into symbol table. -  return DIEInteger(S.getOffset()).SizeOf(AP, Form); +  case dwarf::DW_FORM_strp: +    if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) +      return DIELabel(S.getSymbol()).SizeOf(AP, Form); +    return DIEInteger(S.getOffset()).SizeOf(AP, Form); +  default: +    llvm_unreachable("Expected valid string form"); +  }  }  LLVM_DUMP_METHOD @@ -615,8 +586,8 @@ void DIEString::print(raw_ostream &O) const {  void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {    if (Form == dwarf::DW_FORM_string) {      for (char ch : S) -      AP->EmitInt8(ch); -    AP->EmitInt8(0); +      AP->emitInt8(ch); +    AP->emitInt8(0);      return;    }    llvm_unreachable("Expected valid string form"); @@ -722,9 +693,9 @@ unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {  void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {    switch (Form) {    default: llvm_unreachable("Improper form for block"); -  case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break; -  case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break; -  case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break; +  case dwarf::DW_FORM_block1: Asm->emitInt8(Size);    break; +  case dwarf::DW_FORM_block2: Asm->emitInt16(Size);   break; +  case dwarf::DW_FORM_block4: Asm->emitInt32(Size);   break;    case dwarf::DW_FORM_block:    case dwarf::DW_FORM_exprloc:      Asm->EmitULEB128(Size); break; @@ -773,10 +744,11 @@ unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {  void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {    switch (Form) {    default: llvm_unreachable("Improper form for block"); -  case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break; -  case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break; -  case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break; +  case dwarf::DW_FORM_block1: Asm->emitInt8(Size);    break; +  case dwarf::DW_FORM_block2: Asm->emitInt16(Size);   break; +  case dwarf::DW_FORM_block4: Asm->emitInt32(Size);   break;    case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break; +  case dwarf::DW_FORM_string: break;    case dwarf::DW_FORM_data16: break;    } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 15ade3c96dfe..b8f1202494d7 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -28,7 +28,7 @@ using namespace llvm;  #define DEBUG_TYPE "dwarfdebug" -/// \brief Grabs the string in whichever attribute is passed in and returns +/// Grabs the string in whichever attribute is passed in and returns  /// a reference to it.  static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {    // Iterate through all the attributes until we find the one we're @@ -40,10 +40,10 @@ static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {    return StringRef("");  } -/// \brief Adds the string in \p Str to the hash. This also hashes +/// Adds the string in \p Str to the hash. This also hashes  /// a trailing NULL with the string.  void DIEHash::addString(StringRef Str) { -  DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); +  LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");    Hash.update(Str);    Hash.update(makeArrayRef((uint8_t)'\0'));  } @@ -51,9 +51,9 @@ void DIEHash::addString(StringRef Str) {  // FIXME: The LEB128 routines are copied and only slightly modified out of  // LEB128.h. -/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. +/// Adds the unsigned in \p Value to the hash encoded as a ULEB128.  void DIEHash::addULEB128(uint64_t Value) { -  DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); +  LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");    do {      uint8_t Byte = Value & 0x7f;      Value >>= 7; @@ -64,7 +64,7 @@ void DIEHash::addULEB128(uint64_t Value) {  }  void DIEHash::addSLEB128(int64_t Value) { -  DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); +  LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");    bool More;    do {      uint8_t Byte = Value & 0x7f; @@ -77,10 +77,10 @@ void DIEHash::addSLEB128(int64_t Value) {    } while (More);  } -/// \brief Including \p Parent adds the context of Parent to the hash.. +/// Including \p Parent adds the context of Parent to the hash..  void DIEHash::addParentContext(const DIE &Parent) { -  DEBUG(dbgs() << "Adding parent context to hash...\n"); +  LLVM_DEBUG(dbgs() << "Adding parent context to hash...\n");    // [7.27.2] For each surrounding type or namespace beginning with the    // outermost such construct... @@ -108,7 +108,7 @@ void DIEHash::addParentContext(const DIE &Parent) {      // ... Then the name, taken from the DW_AT_name attribute.      StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); -    DEBUG(dbgs() << "... adding context: " << Name << "\n"); +    LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");      if (!Name.empty())        addString(Name);    } @@ -118,9 +118,9 @@ void DIEHash::addParentContext(const DIE &Parent) {  void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {    for (const auto &V : Die.values()) { -    DEBUG(dbgs() << "Attribute: " -                 << dwarf::AttributeString(V.getAttribute()) -                 << " added.\n"); +    LLVM_DEBUG(dbgs() << "Attribute: " +                      << dwarf::AttributeString(V.getAttribute()) +                      << " added.\n");      switch (V.getAttribute()) {  #define HANDLE_DIE_HASH_ATTR(NAME)                                             \    case dwarf::NAME:                                                            \ diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 29337ae38a99..dae517ab2c29 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -23,7 +23,7 @@ namespace llvm {  class AsmPrinter;  class CompileUnit; -/// \brief An object containing the capability of hashing and adding hash +/// An object containing the capability of hashing and adding hash  /// attributes onto a DIE.  class DIEHash {    // Collection of all attributes used in hashing a particular DIE. @@ -35,66 +35,66 @@ class DIEHash {  public:    DIEHash(AsmPrinter *A = nullptr) : AP(A) {} -  /// \brief Computes the CU signature. +  /// Computes the CU signature.    uint64_t computeCUSignature(StringRef DWOName, const DIE &Die); -  /// \brief Computes the type signature. +  /// Computes the type signature.    uint64_t computeTypeSignature(const DIE &Die);    // Helper routines to process parts of a DIE.  private: -  /// \brief Adds the parent context of \param Die to the hash. -  void addParentContext(const DIE &Die); +  /// Adds the parent context of \param Parent to the hash. +  void addParentContext(const DIE &Parent); -  /// \brief Adds the attributes of \param Die to the hash. +  /// Adds the attributes of \param Die to the hash.    void addAttributes(const DIE &Die); -  /// \brief Computes the full DWARF4 7.27 hash of the DIE. +  /// Computes the full DWARF4 7.27 hash of the DIE.    void computeHash(const DIE &Die);    // Routines that add DIEValues to the hash.  public: -  /// \brief Adds \param Value to the hash. +  /// Adds \param Value to the hash.    void update(uint8_t Value) { Hash.update(Value); } -  /// \brief Encodes and adds \param Value to the hash as a ULEB128. +  /// Encodes and adds \param Value to the hash as a ULEB128.    void addULEB128(uint64_t Value); -  /// \brief Encodes and adds \param Value to the hash as a SLEB128. +  /// Encodes and adds \param Value to the hash as a SLEB128.    void addSLEB128(int64_t Value);  private: -  /// \brief Adds \param Str to the hash and includes a NULL byte. +  /// Adds \param Str to the hash and includes a NULL byte.    void addString(StringRef Str); -  /// \brief Collects the attributes of DIE \param Die into the \param Attrs +  /// Collects the attributes of DIE \param Die into the \param Attrs    /// structure.    void collectAttributes(const DIE &Die, DIEAttrs &Attrs); -  /// \brief Hashes the attributes in \param Attrs in order. +  /// Hashes the attributes in \param Attrs in order.    void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); -  /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or +  /// Hashes the data in a block like DIEValue, e.g. DW_FORM_block or    /// DW_FORM_exprloc.    void hashBlockData(const DIE::const_value_range &Values); -  /// \brief Hashes the contents pointed to in the .debug_loc section. +  /// Hashes the contents pointed to in the .debug_loc section.    void hashLocList(const DIELocList &LocList); -  /// \brief Hashes an individual attribute. +  /// Hashes an individual attribute.    void hashAttribute(const DIEValue &Value, dwarf::Tag Tag); -  /// \brief Hashes an attribute that refers to another DIE. +  /// Hashes an attribute that refers to another DIE.    void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,                      const DIE &Entry); -  /// \brief Hashes a reference to a named type in such a way that is +  /// Hashes a reference to a named type in such a way that is    /// independent of whether that type is described by a declaration or a    /// definition.    void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry,                                  StringRef Name); -  /// \brief Hashes a reference to a previously referenced type DIE. +  /// Hashes a reference to a previously referenced type DIE.    void hashRepeatedTypeReference(dwarf::Attribute Attribute,                                   unsigned DieNumber); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 856758c8e4f6..25518a339c61 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -31,7 +31,7 @@ using namespace llvm;  #define DEBUG_TYPE "dwarfdebug" -// \brief If @MI is a DBG_VALUE with debug value described by a +// If @MI is a DBG_VALUE with debug value described by a  // defined register, returns the number of this register.  // In the other case, returns 0.  static unsigned isDescribedByReg(const MachineInstr &MI) { @@ -50,8 +50,8 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,    auto &Ranges = VarInstrRanges[Var];    if (!Ranges.empty() && Ranges.back().second == nullptr &&        Ranges.back().first->isIdenticalTo(MI)) { -    DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" -                 << "\t" << Ranges.back().first << "\t" << MI << "\n"); +    LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" +                      << "\t" << Ranges.back().first << "\t" << MI << "\n");      return;    }    Ranges.push_back(std::make_pair(&MI, nullptr)); @@ -86,7 +86,7 @@ using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedVariable, 1>>;  } // end anonymous namespace -// \brief Claim that @Var is not described by @RegNo anymore. +// Claim that @Var is not described by @RegNo anymore.  static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,                                  InlinedVariable Var) {    const auto &I = RegVars.find(RegNo); @@ -100,7 +100,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,      RegVars.erase(I);  } -// \brief Claim that @Var is now described by @RegNo. +// Claim that @Var is now described by @RegNo.  static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,                                 InlinedVariable Var) {    assert(RegNo != 0U); @@ -109,7 +109,7 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,    VarSet.push_back(Var);  } -// \brief Terminate the location range for variables described by register at +// Terminate the location range for variables described by register at  // @I by inserting @ClobberingInstr to their history.  static void clobberRegisterUses(RegDescribedVarsMap &RegVars,                                  RegDescribedVarsMap::iterator I, @@ -122,7 +122,7 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars,    RegVars.erase(I);  } -// \brief Terminate the location range for variables described by register +// Terminate the location range for variables described by register  // @RegNo by inserting @ClobberingInstr to their history.  static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,                                  DbgValueHistoryMap &HistMap, @@ -133,7 +133,7 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,    clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);  } -// \brief Returns the first instruction in @MBB which corresponds to +// Returns the first instruction in @MBB which corresponds to  // the function epilogue, or nullptr if @MBB doesn't contain an epilogue.  static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {    auto LastMI = MBB.getLastNonDebugInstr(); @@ -155,7 +155,7 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {    return &*MBB.begin();  } -// \brief Collect registers that are modified in the function body (their +// Collect registers that are modified in the function body (their  // contents is changed outside of the prologue and epilogue).  static void collectChangingRegs(const MachineFunction *MF,                                  const TargetRegisterInfo *TRI, @@ -198,7 +198,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,    RegDescribedVarsMap RegVars;    for (const auto &MBB : *MF) {      for (const auto &MI : MBB) { -      if (!MI.isDebugValue()) { +      if (!MI.isDebugInstr()) {          // Not a DBG_VALUE instruction. It may clobber registers which describe          // some variables.          for (const MachineOperand &MO : MI.operands()) { @@ -234,6 +234,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,          continue;        } +      // Skip DBG_LABEL instructions. +      if (MI.isDebugLabel()) +        continue; +        assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");        // Use the base variable (without any DW_OP_piece expressions)        // as index into History. The full variables including the @@ -265,3 +269,33 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,      }    }  } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const { +  dbgs() << "DbgValueHistoryMap:\n"; +  for (const auto &VarRangePair : *this) { +    const InlinedVariable &Var = VarRangePair.first; +    const InstrRanges &Ranges = VarRangePair.second; + +    const DILocalVariable *LocalVar = Var.first; +    const DILocation *Location = Var.second; + +    dbgs() << " - " << LocalVar->getName() << " at "; + +    if (Location) +      dbgs() << Location->getFilename() << ":" << Location->getLine() << ":" +             << Location->getColumn(); +    else +      dbgs() << "<unknown location>"; + +    dbgs() << " --\n"; + +    for (const InstrRange &Range : Ranges) { +      dbgs() << "   Begin: " << *Range.first; +      if (Range.second) +        dbgs() << "   End  : " << *Range.second; +      dbgs() << "\n"; +    } +  } +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index a7b0562e8102..a262cb38b175 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -52,6 +52,10 @@ public:    void clear() { VarInstrRanges.clear(); }    InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); }    InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +  LLVM_DUMP_METHOD void dump() const; +#endif  };  void calculateDbgValueHistory(const MachineFunction *MF, diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 2e5c22447936..82e14dc13cb1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -25,6 +25,8 @@  using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" +  Optional<DbgVariableLocation>  DbgVariableLocation::extractFromMachineInstruction(      const MachineInstr &Instruction) { @@ -123,29 +125,6 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {    return LabelsAfterInsn.lookup(MI);  } -int DebugHandlerBase::fragmentCmp(const DIExpression *P1, -                                  const DIExpression *P2) { -  auto Fragment1 = *P1->getFragmentInfo(); -  auto Fragment2 = *P2->getFragmentInfo(); -  unsigned l1 = Fragment1.OffsetInBits; -  unsigned l2 = Fragment2.OffsetInBits; -  unsigned r1 = l1 + Fragment1.SizeInBits; -  unsigned r2 = l2 + Fragment2.SizeInBits; -  if (r1 <= l2) -    return -1; -  else if (r2 <= l1) -    return 1; -  else -    return 0; -} - -bool DebugHandlerBase::fragmentsOverlap(const DIExpression *P1, -                                        const DIExpression *P2) { -  if (!P1->isFragment() || !P2->isFragment()) -    return true; -  return fragmentCmp(P1, P2) == 0; -} -  /// If this type is derived from a base type then return base type size.  uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {    DIType *Ty = TyRef.resolve(); @@ -213,6 +192,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {    assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");    calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),                             DbgValues); +  LLVM_DEBUG(DbgValues.dump());    // Request labels for the full history.    for (const auto &I : DbgValues) { @@ -232,8 +212,8 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {            const DIExpression *Fragment = I->first->getDebugExpression();            if (std::all_of(Ranges.begin(), I,                            [&](DbgValueHistoryMap::InstrRange Pred) { -                            return !fragmentsOverlap( -                                Fragment, Pred.first->getDebugExpression()); +                            return !Fragment->fragmentsOverlap( +                                Pred.first->getDebugExpression());                            }))              LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();            else diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h index 245d70038de9..1ccefe32be75 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h @@ -122,14 +122,6 @@ public:    /// Return Label immediately following the instruction.    MCSymbol *getLabelAfterInsn(const MachineInstr *MI); -  /// Determine the relative position of the fragments described by P1 and P2. -  /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, 1 if P1 is -  /// entirely after P2. -  static int fragmentCmp(const DIExpression *P1, const DIExpression *P2); - -  /// Determine whether two variable fragments overlap. -  static bool fragmentsOverlap(const DIExpression *P1, const DIExpression *P2); -    /// If this type is derived from a base type then return base type size.    static uint64_t getBaseTypeSize(const DITypeRef TyRef);  }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 3d6d8a76529c..ac49657b68fa 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -11,6 +11,7 @@  #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H  #include "DebugLocStream.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DebugInfo.h"  #include "llvm/MC/MCSymbol.h" @@ -20,7 +21,7 @@  namespace llvm {  class AsmPrinter; -/// \brief This struct describes location entries emitted in the .debug_loc +/// This struct describes location entries emitted in the .debug_loc  /// section.  class DebugLocEntry {    /// Begin and end symbols for the address range that this location is valid. @@ -28,7 +29,7 @@ class DebugLocEntry {    const MCSymbol *End;  public: -  /// \brief A single location or constant. +  /// A single location or constant.    struct Value {      Value(const DIExpression *Expr, int64_t i)          : Expression(Expr), EntryKind(E_Integer) { @@ -105,13 +106,13 @@ public:      Values.push_back(std::move(Val));    } -  /// \brief If this and Next are describing different pieces of the same +  /// If this and Next are describing different pieces of the same    /// variable, merge them by appending Next's values to the current    /// list of values.    /// Return true if the merge was successful.    bool MergeValues(const DebugLocEntry &Next); -  /// \brief Attempt to merge this DebugLocEntry with Next and return +  /// Attempt to merge this DebugLocEntry with Next and return    /// true if the merge was successful. Entries can be merged if they    /// share the same Loc/Constant and if Next immediately follows this    /// Entry. @@ -135,10 +136,10 @@ public:          }) && "value must be a piece");    } -  // \brief Sort the pieces by offset. +  // Sort the pieces by offset.    // Remove any duplicate entries by dropping all but the first.    void sortUniqueValues() { -    std::sort(Values.begin(), Values.end()); +    llvm::sort(Values.begin(), Values.end());      Values.erase(          std::unique(              Values.begin(), Values.end(), [](const Value &A, const Value &B) { @@ -147,12 +148,12 @@ public:          Values.end());    } -  /// \brief Lower this entry into a DWARF expression. +  /// Lower this entry into a DWARF expression.    void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List,                  const DIBasicType *BT);  }; -/// \brief Compare two Values for equality. +/// Compare two Values for equality.  inline bool operator==(const DebugLocEntry::Value &A,                         const DebugLocEntry::Value &B) {    if (A.EntryKind != B.EntryKind) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h index 0c551dfff9cc..8dcf5cbc1889 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -22,7 +22,7 @@ class DwarfCompileUnit;  class MachineInstr;  class MCSymbol; -/// \brief Byte stream of .debug_loc entries. +/// Byte stream of .debug_loc entries.  ///  /// Stores a unified stream of .debug_loc entries.  There's \a List for each  /// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry. @@ -55,7 +55,7 @@ private:    SmallString<256> DWARFBytes;    SmallVector<std::string, 32> Comments; -  /// \brief Only verbose textual output needs comments.  This will be set to +  /// Only verbose textual output needs comments.  This will be set to    /// true for that case, and false otherwise.    bool GenerateComments; @@ -69,7 +69,7 @@ public:    class EntryBuilder;  private: -  /// \brief Start a new .debug_loc entry list. +  /// Start a new .debug_loc entry list.    ///    /// Start a new .debug_loc entry list.  Return the new list's index so it can    /// be retrieved later via \a getList(). @@ -89,7 +89,7 @@ private:    /// \return false iff the list is deleted.    bool finalizeList(AsmPrinter &Asm); -  /// \brief Start a new .debug_loc entry. +  /// Start a new .debug_loc entry.    ///    /// Until the next call, bytes added to the stream will be added to this    /// entry. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp deleted file mode 100644 index c21b3d3451ad..000000000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ /dev/null @@ -1,293 +0,0 @@ -//===- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables --------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing dwarf accelerator tables. -// -//===----------------------------------------------------------------------===// - -#include "DwarfAccelTable.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/DIE.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstddef> -#include <cstdint> -#include <iterator> -#include <limits> -#include <vector> - -using namespace llvm; - -// The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) -    : Header(8 + (atomList.size() * 4)), HeaderData(atomList), -      Entries(Allocator) {} - -void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die, -                              char Flags) { -  assert(Data.empty() && "Already finalized!"); -  // If the string is in the list already then add this die to the list -  // otherwise add a new one. -  DataArray &DIEs = Entries[Name.getString()]; -  assert(!DIEs.Name || DIEs.Name == Name); -  DIEs.Name = Name; -  DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); -} - -void DwarfAccelTable::ComputeBucketCount() { -  // First get the number of unique hashes. -  std::vector<uint32_t> uniques(Data.size()); -  for (size_t i = 0, e = Data.size(); i < e; ++i) -    uniques[i] = Data[i]->HashValue; -  array_pod_sort(uniques.begin(), uniques.end()); -  std::vector<uint32_t>::iterator p = -      std::unique(uniques.begin(), uniques.end()); -  uint32_t num = std::distance(uniques.begin(), p); - -  // Then compute the bucket size, minimum of 1 bucket. -  if (num > 1024) -    Header.bucket_count = num / 4; -  else if (num > 16) -    Header.bucket_count = num / 2; -  else -    Header.bucket_count = num > 0 ? num : 1; - -  Header.hashes_count = num; -} - -// compareDIEs - comparison predicate that sorts DIEs by their offset. -static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, -                        const DwarfAccelTable::HashDataContents *B) { -  return A->Die->getOffset() < B->Die->getOffset(); -} - -void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { -  // Create the individual hash data outputs. -  Data.reserve(Entries.size()); -  for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); -       EI != EE; ++EI) { - -    // Unique the entries. -    std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs); -    EI->second.Values.erase( -        std::unique(EI->second.Values.begin(), EI->second.Values.end()), -        EI->second.Values.end()); - -    HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); -    Data.push_back(Entry); -  } - -  // Figure out how many buckets we need, then compute the bucket -  // contents and the final ordering. We'll emit the hashes and offsets -  // by doing a walk during the emission phase. We add temporary -  // symbols to the data so that we can reference them during the offset -  // later, we'll emit them when we emit the data. -  ComputeBucketCount(); - -  // Compute bucket contents and final ordering. -  Buckets.resize(Header.bucket_count); -  for (size_t i = 0, e = Data.size(); i < e; ++i) { -    uint32_t bucket = Data[i]->HashValue % Header.bucket_count; -    Buckets[bucket].push_back(Data[i]); -    Data[i]->Sym = Asm->createTempSymbol(Prefix); -  } - -  // Sort the contents of the buckets by hash value so that hash -  // collisions end up together. Stable sort makes testing easier and -  // doesn't cost much more. -  for (size_t i = 0; i < Buckets.size(); ++i) -    std::stable_sort(Buckets[i].begin(), Buckets[i].end(), -                     [] (HashData *LHS, HashData *RHS) { -                       return LHS->HashValue < RHS->HashValue; -                     }); -} - -// Emits the header for the table via the AsmPrinter. -void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { -  Asm->OutStreamer->AddComment("Header Magic"); -  Asm->EmitInt32(Header.magic); -  Asm->OutStreamer->AddComment("Header Version"); -  Asm->EmitInt16(Header.version); -  Asm->OutStreamer->AddComment("Header Hash Function"); -  Asm->EmitInt16(Header.hash_function); -  Asm->OutStreamer->AddComment("Header Bucket Count"); -  Asm->EmitInt32(Header.bucket_count); -  Asm->OutStreamer->AddComment("Header Hash Count"); -  Asm->EmitInt32(Header.hashes_count); -  Asm->OutStreamer->AddComment("Header Data Length"); -  Asm->EmitInt32(Header.header_data_len); -  Asm->OutStreamer->AddComment("HeaderData Die Offset Base"); -  Asm->EmitInt32(HeaderData.die_offset_base); -  Asm->OutStreamer->AddComment("HeaderData Atom Count"); -  Asm->EmitInt32(HeaderData.Atoms.size()); -  for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { -    Atom A = HeaderData.Atoms[i]; -    Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type)); -    Asm->EmitInt16(A.type); -    Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form)); -    Asm->EmitInt16(A.form); -  } -} - -// Walk through and emit the buckets for the table. Each index is -// an offset into the list of hashes. -void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { -  unsigned index = 0; -  for (size_t i = 0, e = Buckets.size(); i < e; ++i) { -    Asm->OutStreamer->AddComment("Bucket " + Twine(i)); -    if (!Buckets[i].empty()) -      Asm->EmitInt32(index); -    else -      Asm->EmitInt32(std::numeric_limits<uint32_t>::max()); -    // Buckets point in the list of hashes, not to the data. Do not -    // increment the index multiple times in case of hash collisions. -    uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); -    for (auto *HD : Buckets[i]) { -      uint32_t HashValue = HD->HashValue; -      if (PrevHash != HashValue) -        ++index; -      PrevHash = HashValue; -    } -  } -} - -// Walk through the buckets and emit the individual hashes for each -// bucket. -void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { -  uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); -  for (size_t i = 0, e = Buckets.size(); i < e; ++i) { -    for (HashList::const_iterator HI = Buckets[i].begin(), -                                  HE = Buckets[i].end(); -         HI != HE; ++HI) { -      uint32_t HashValue = (*HI)->HashValue; -      if (PrevHash == HashValue) -        continue; -      Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i)); -      Asm->EmitInt32(HashValue); -      PrevHash = HashValue; -    } -  } -} - -// Walk through the buckets and emit the individual offsets for each -// element in each bucket. This is done via a symbol subtraction from the -// beginning of the section. The non-section symbol will be output later -// when we emit the actual data. -void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { -  uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); -  for (size_t i = 0, e = Buckets.size(); i < e; ++i) { -    for (HashList::const_iterator HI = Buckets[i].begin(), -                                  HE = Buckets[i].end(); -         HI != HE; ++HI) { -      uint32_t HashValue = (*HI)->HashValue; -      if (PrevHash == HashValue) -        continue; -      PrevHash = HashValue; -      Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); -      MCContext &Context = Asm->OutStreamer->getContext(); -      const MCExpr *Sub = MCBinaryExpr::createSub( -          MCSymbolRefExpr::create((*HI)->Sym, Context), -          MCSymbolRefExpr::create(SecBegin, Context), Context); -      Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t)); -    } -  } -} - -// Walk through the buckets and emit the full data for each element in -// the bucket. For the string case emit the dies and the various offsets. -// Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { -  for (size_t i = 0, e = Buckets.size(); i < e; ++i) { -    uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); -    for (HashList::const_iterator HI = Buckets[i].begin(), -                                  HE = Buckets[i].end(); -         HI != HE; ++HI) { -      // Terminate the previous entry if there is no hash collision -      // with the current one. -      if (PrevHash != std::numeric_limits<uint64_t>::max() && -          PrevHash != (*HI)->HashValue) -        Asm->EmitInt32(0); -      // Remember to emit the label for our offset. -      Asm->OutStreamer->EmitLabel((*HI)->Sym); -      Asm->OutStreamer->AddComment((*HI)->Str); -      Asm->emitDwarfStringOffset((*HI)->Data.Name); -      Asm->OutStreamer->AddComment("Num DIEs"); -      Asm->EmitInt32((*HI)->Data.Values.size()); -      for (HashDataContents *HD : (*HI)->Data.Values) { -        // Emit the DIE offset -        Asm->EmitInt32(HD->Die->getDebugSectionOffset()); -        // If we have multiple Atoms emit that info too. -        // FIXME: A bit of a hack, we either emit only one atom or all info. -        if (HeaderData.Atoms.size() > 1) { -          Asm->EmitInt16(HD->Die->getTag()); -          Asm->EmitInt8(HD->Flags); -        } -      } -      PrevHash = (*HI)->HashValue; -    } -    // Emit the final end marker for the bucket. -    if (!Buckets[i].empty()) -      Asm->EmitInt32(0); -  } -} - -// Emit the entire data structure to the output file. -void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin, -                           DwarfDebug *D) { -  // Emit the header. -  EmitHeader(Asm); - -  // Emit the buckets. -  EmitBuckets(Asm); - -  // Emit the hashes. -  EmitHashes(Asm); - -  // Emit the offsets. -  emitOffsets(Asm, SecBegin); - -  // Emit the hash data. -  EmitData(Asm, D); -} - -#ifndef NDEBUG -void DwarfAccelTable::print(raw_ostream &OS) { -  Header.print(OS); -  HeaderData.print(OS); - -  OS << "Entries: \n"; -  for (StringMap<DataArray>::const_iterator EI = Entries.begin(), -                                            EE = Entries.end(); -       EI != EE; ++EI) { -    OS << "Name: " << EI->getKeyData() << "\n"; -    for (HashDataContents *HD : EI->second.Values) -      HD->print(OS); -  } - -  OS << "Buckets and Hashes: \n"; -  for (size_t i = 0, e = Buckets.size(); i < e; ++i) -    for (HashList::const_iterator HI = Buckets[i].begin(), -                                  HE = Buckets[i].end(); -         HI != HE; ++HI) -      (*HI)->print(OS); - -  OS << "Data: \n"; -  for (std::vector<HashData *>::const_iterator DI = Data.begin(), -                                               DE = Data.end(); -       DI != DE; ++DI) -    (*DI)->print(OS); -} -#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h deleted file mode 100644 index f56199dc8e72..000000000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ /dev/null @@ -1,261 +0,0 @@ -//==- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables --*- C++ -*-==// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing dwarf accelerator tables. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/CodeGen/DIE.h" -#include "llvm/CodeGen/DwarfStringPoolEntry.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include <cstddef> -#include <cstdint> -#include <vector> - -// The dwarf accelerator tables are an indirect hash table optimized -// for null lookup rather than access to known data. They are output into -// an on-disk format that looks like this: -// -// .-------------. -// |  HEADER     | -// |-------------| -// |  BUCKETS    | -// |-------------| -// |  HASHES     | -// |-------------| -// |  OFFSETS    | -// |-------------| -// |  DATA       | -// `-------------' -// -// where the header contains a magic number, version, type of hash function, -// the number of buckets, total number of hashes, and room for a special -// struct of data and the length of that struct. -// -// The buckets contain an index (e.g. 6) into the hashes array. The hashes -// section contains all of the 32-bit hash values in contiguous memory, and -// the offsets contain the offset into the data area for the particular -// hash. -// -// For a lookup example, we could hash a function name and take it modulo the -// number of buckets giving us our bucket. From there we take the bucket value -// as an index into the hashes table and look at each successive hash as long -// as the hash value is still the same modulo result (bucket value) as earlier. -// If we have a match we look at that same entry in the offsets table and -// grab the offset in the data for our final match. - -namespace llvm { - -class AsmPrinter; -class DwarfDebug; - -class DwarfAccelTable { -  // Helper function to compute the number of buckets needed based on -  // the number of unique hashes. -  void ComputeBucketCount(); - -  struct TableHeader { -    uint32_t magic = MagicHash; // 'HASH' magic value to allow endian detection -    uint16_t version = 1;       // Version number. -    uint16_t hash_function = dwarf::DW_hash_function_djb; -    // The hash function enumeration that was used. -    uint32_t bucket_count = 0;  // The number of buckets in this hash table. -    uint32_t hashes_count = 0;  // The total number of unique hash values -                                // and hash data offsets in this table. -    uint32_t header_data_len;   // The bytes to skip to get to the hash -                                // indexes (buckets) for correct alignment. -    // Also written to disk is the implementation specific header data. - -    static const uint32_t MagicHash = 0x48415348; - -    TableHeader(uint32_t data_len) : header_data_len(data_len) {} - -#ifndef NDEBUG -    void print(raw_ostream &OS) { -      OS << "Magic: " << format("0x%x", magic) << "\n" -         << "Version: " << version << "\n" -         << "Hash Function: " << hash_function << "\n" -         << "Bucket Count: " << bucket_count << "\n" -         << "Header Data Length: " << header_data_len << "\n"; -    } - -    void dump() { print(dbgs()); } -#endif -  }; - -public: -  // The HeaderData describes the form of each set of data. In general this -  // is as a list of atoms (atom_count) where each atom contains a type -  // (AtomType type) of data, and an encoding form (form). In the case of -  // data that is referenced via DW_FORM_ref_* the die_offset_base is -  // used to describe the offset for all forms in the list of atoms. -  // This also serves as a public interface of sorts. -  // When written to disk this will have the form: -  // -  // uint32_t die_offset_base -  // uint32_t atom_count -  // atom_count Atoms - -  // Make these public so that they can be used as a general interface to -  // the class. -  struct Atom { -    uint16_t type; // enum AtomType -    uint16_t form; // DWARF DW_FORM_ defines - -    constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {} - -#ifndef NDEBUG -    void print(raw_ostream &OS) { -      OS << "Type: " << dwarf::AtomTypeString(type) << "\n" -         << "Form: " << dwarf::FormEncodingString(form) << "\n"; -    } - -    void dump() { print(dbgs()); } -#endif -  }; - -private: -  struct TableHeaderData { -    uint32_t die_offset_base; -    SmallVector<Atom, 3> Atoms; - -    TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) -        : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} - -#ifndef NDEBUG -    void print(raw_ostream &OS) { -      OS << "die_offset_base: " << die_offset_base << "\n"; -      for (size_t i = 0; i < Atoms.size(); i++) -        Atoms[i].print(OS); -    } - -    void dump() { print(dbgs()); } -#endif -  }; - -  // The data itself consists of a str_offset, a count of the DIEs in the -  // hash and the offsets to the DIEs themselves. -  // On disk each data section is ended with a 0 KeyType as the end of the -  // hash chain. -  // On output this looks like: -  // uint32_t str_offset -  // uint32_t hash_data_count -  // HashData[hash_data_count] -public: -  struct HashDataContents { -    const DIE *Die;   // Offsets -    char Flags; // Specific flags to output - -    HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {} - -#ifndef NDEBUG -    void print(raw_ostream &OS) const { -      OS << "  Offset: " << Die->getOffset() << "\n" -         << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n" -         << "  Flags: " << Flags << "\n"; -    } -#endif -  }; - -private: -  // String Data -  struct DataArray { -    DwarfStringPoolEntryRef Name; -    std::vector<HashDataContents *> Values; -  }; - -  friend struct HashData; - -  struct HashData { -    StringRef Str; -    uint32_t HashValue; -    MCSymbol *Sym; -    DwarfAccelTable::DataArray &Data; // offsets - -    HashData(StringRef S, DwarfAccelTable::DataArray &Data) -        : Str(S), Data(Data) { -      HashValue = dwarf::djbHash(S); -    } - -#ifndef NDEBUG -    void print(raw_ostream &OS) { -      OS << "Name: " << Str << "\n"; -      OS << "  Hash Value: " << format("0x%x", HashValue) << "\n"; -      OS << "  Symbol: "; -      if (Sym) -        OS << *Sym; -      else -        OS << "<none>"; -      OS << "\n"; -      for (HashDataContents *C : Data.Values) { -        OS << "  Offset: " << C->Die->getOffset() << "\n"; -        OS << "  Tag: " << dwarf::TagString(C->Die->getTag()) << "\n"; -        OS << "  Flags: " << C->Flags << "\n"; -      } -    } - -    void dump() { print(dbgs()); } -#endif -  }; - -  // Internal Functions -  void EmitHeader(AsmPrinter *); -  void EmitBuckets(AsmPrinter *); -  void EmitHashes(AsmPrinter *); -  void emitOffsets(AsmPrinter *, const MCSymbol *); -  void EmitData(AsmPrinter *, DwarfDebug *D); - -  // Allocator for HashData and HashDataContents. -  BumpPtrAllocator Allocator; - -  // Output Variables -  TableHeader Header; -  TableHeaderData HeaderData; -  std::vector<HashData *> Data; - -  using StringEntries = StringMap<DataArray, BumpPtrAllocator &>; - -  StringEntries Entries; - -  // Buckets/Hashes/Offsets -  using HashList = std::vector<HashData *>; -  using BucketList = std::vector<HashList>; -  BucketList Buckets; -  HashList Hashes; - -  // Public Implementation -public: -  DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); -  DwarfAccelTable(const DwarfAccelTable &) = delete; -  DwarfAccelTable &operator=(const DwarfAccelTable &) = delete; - -  void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0); -  void FinalizeTable(AsmPrinter *, StringRef); -  void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *); -#ifndef NDEBUG -  void print(raw_ostream &OS); -  void dump() { print(dbgs()); } -#endif -}; - -} // end namespace llvm - -#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index cbb4c48b4d88..1990456cc555 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -17,7 +17,6 @@  #include "llvm/CodeGen/AsmPrinter.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Mangler.h"  #include "llvm/IR/Module.h" @@ -30,6 +29,7 @@  #include "llvm/MC/MachineLocation.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetOptions.h"  using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index c8cd8eb8ffd3..32271a0ef24a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -28,7 +28,6 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/IR/DataLayout.h" @@ -40,6 +39,7 @@  #include "llvm/MC/MCSymbol.h"  #include "llvm/MC/MachineLocation.h"  #include "llvm/Support/Casting.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h"  #include <algorithm> @@ -94,16 +94,18 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,                   DIEInteger(0));  } -unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, -                                               StringRef DirName) { +unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {    // If we print assembly, we can't separate .file entries according to    // compile units. Thus all files will belong to the default compile unit.    // FIXME: add a better feature test than hasRawTextSupport. Even better,    // extend .file to support this. +  unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); +  if (!File) +    return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, None, CUID);    return Asm->OutStreamer->EmitDwarfFileDirective( -      0, DirName, FileName, -      Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID()); +      0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), +      File->getSource(), CUID);  }  DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( @@ -190,10 +192,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(        DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);      } +    if (Expr) +      DwarfExpr->addFragmentOffset(Expr); +      if (Global) {        const MCSymbol *Sym = Asm->getSymbol(Global);        if (Global->isThreadLocal()) { -        if (Asm->TM.Options.EmulatedTLS) { +        if (Asm->TM.useEmulatedTLS()) {            // TODO: add debug info for emulated thread local mode.          } else {            // FIXME: Make this work with -gsplit-dwarf. @@ -225,10 +230,13 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(          addOpAddress(*Loc, Sym);        }      } -    if (Expr) { -      DwarfExpr->addFragmentOffset(Expr); -      DwarfExpr->addExpression(Expr); -    } +    // Global variables attached to symbols are memory locations. +    // It would be better if this were unconditional, but malformed input that +    // mixes non-fragments and fragments for the same variable is too expensive +    // to detect in the verifier. +    if (DwarfExpr->isUnknownLocation()) +      DwarfExpr->setMemoryLocationKind(); +    DwarfExpr->addExpression(Expr);    }    if (Loc)      addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); @@ -241,7 +249,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(      // If the linkage name is different than the name, go ahead and output      // that as well into the name table. -    if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName()) +    if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() && +        DD->useAllLinkageNames())        DD->addAccelName(GV->getLinkageName(), *VariableDIE);    } @@ -267,15 +276,20 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {  void DwarfCompileUnit::initStmtList() {    // Define start line table label for each Compile Unit. -  MCSymbol *LineTableStartSym = -      Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); +  MCSymbol *LineTableStartSym; +  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); +  if (DD->useSectionsAsReferences()) { +    LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol(); +  } else { +    LineTableStartSym = +        Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); +  }    // DW_AT_stmt_list is a offset of line number information for this    // compile unit in debug_line section. For split dwarf this is    // left in the skeleton CU and so not included.    // The line table entries are not always emitted in assembly, so it    // is not okay to use line_table_start here. -  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();    StmtListValue =        addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,                        TLOF.getDwarfLineSection()->getBeginSymbol()); @@ -313,10 +327,16 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {    // Only include DW_AT_frame_base in full debug info    if (!includeMinimalInlineScopes()) { -    const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); -    MachineLocation Location(RI->getFrameRegister(*Asm->MF)); -    if (RI->isPhysicalRegister(Location.getReg())) -      addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); +    if (Asm->MF->getTarget().getTargetTriple().isNVPTX()) { +      DIELoc *Loc = new (DIEValueAllocator) DIELoc; +      addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa); +      addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); +    } else { +      const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); +      MachineLocation Location(RI->getFrameRegister(*Asm->MF)); +      if (RI->isPhysicalRegister(Location.getReg())) +        addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); +    }    }    // Add name to the name table, we do this here because we're guaranteed @@ -385,21 +405,28 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,                                           SmallVector<RangeSpan, 2> Range) {    const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); -  // Emit offset in .debug_range as a relocatable label. emitDIE will handle -  // emitting it appropriately. +  // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable +  // label. emitDIE() will handle emitting it appropriately.    const MCSymbol *RangeSectionSym = -      TLOF.getDwarfRangesSection()->getBeginSymbol(); +      DD->getDwarfVersion() >= 5 +          ? TLOF.getDwarfRnglistsSection()->getBeginSymbol() +          : TLOF.getDwarfRangesSection()->getBeginSymbol();    RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));    // Under fission, ranges are specified by constant offsets relative to the    // CU's DW_AT_GNU_ranges_base. -  if (isDwoUnit()) -    addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), -                    RangeSectionSym); -  else +  // FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under +  // fission until we support the forms using the .debug_addr section +  // (DW_RLE_startx_endx etc.). +  if (isDwoUnit()) { +    if (DD->getDwarfVersion() < 5) +      addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), +                      RangeSectionSym); +  } else {      addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),                      RangeSectionSym); +  }    // Add the range list to the set of ranges to be emitted.    (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List)); @@ -407,9 +434,10 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,  void DwarfCompileUnit::attachRangesOrLowHighPC(      DIE &Die, SmallVector<RangeSpan, 2> Ranges) { -  if (Ranges.size() == 1) { -    const auto &single = Ranges.front(); -    attachLowHighPC(Die, single.getStart(), single.getEnd()); +  if (Ranges.size() == 1 || !DD->useRangesSection()) { +    const RangeSpan &Front = Ranges.front(); +    const RangeSpan &Back = Ranges.back(); +    attachLowHighPC(Die, Front.getStart(), Back.getEnd());    } else      addScopeRangeList(Die, std::move(Ranges));  } @@ -443,7 +471,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {    // Add the call site information to the DIE.    const DILocation *IA = Scope->getInlinedAt();    addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, -          getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); +          getOrCreateSourceID(IA->getFile()));    addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());    if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)      addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, @@ -482,6 +510,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,                                                  bool Abstract) {    // Define variable debug information entry.    auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag()); +  insertDIE(DV.getVariable(), VariableDie);    if (Abstract) {      applyVariableAttributes(DV, *VariableDie); @@ -547,8 +576,11 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,      Ops.append(Expr->elements_begin(), Expr->elements_end());      DIExpressionCursor Cursor(Ops);      DwarfExpr.setMemoryLocationKind(); -    DwarfExpr.addMachineRegExpression( -        *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); +    if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol()) +      addOpAddress(*Loc, FrameSymbol); +    else +      DwarfExpr.addMachineRegExpression( +          *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);      DwarfExpr.addExpression(std::move(Cursor));    }    addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); @@ -565,13 +597,95 @@ DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,    return Var;  } +/// Return all DIVariables that appear in count: expressions. +static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) { +  SmallVector<const DIVariable *, 2> Result; +  auto *Array = dyn_cast<DICompositeType>(Var->getType()); +  if (!Array || Array->getTag() != dwarf::DW_TAG_array_type) +    return Result; +  for (auto *El : Array->getElements()) { +    if (auto *Subrange = dyn_cast<DISubrange>(El)) { +      auto Count = Subrange->getCount(); +      if (auto *Dependency = Count.dyn_cast<DIVariable *>()) +        Result.push_back(Dependency); +    } +  } +  return Result; +} + +/// Sort local variables so that variables appearing inside of helper +/// expressions come first. +static SmallVector<DbgVariable *, 8> +sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { +  SmallVector<DbgVariable *, 8> Result; +  SmallVector<PointerIntPair<DbgVariable *, 1>, 8> WorkList; +  // Map back from a DIVariable to its containing DbgVariable. +  SmallDenseMap<const DILocalVariable *, DbgVariable *> DbgVar; +  // Set of DbgVariables in Result. +  SmallDenseSet<DbgVariable *, 8> Visited; +  // For cycle detection. +  SmallDenseSet<DbgVariable *, 8> Visiting; + +  // Initialize the worklist and the DIVariable lookup table. +  for (auto Var : reverse(Input)) { +    DbgVar.insert({Var->getVariable(), Var}); +    WorkList.push_back({Var, 0}); +  } + +  // Perform a stable topological sort by doing a DFS. +  while (!WorkList.empty()) { +    auto Item = WorkList.back(); +    DbgVariable *Var = Item.getPointer(); +    bool visitedAllDependencies = Item.getInt(); +    WorkList.pop_back(); + +    // Dependency is in a different lexical scope or a global. +    if (!Var) +      continue; + +    // Already handled. +    if (Visited.count(Var)) +      continue; + +    // Add to Result if all dependencies are visited. +    if (visitedAllDependencies) { +      Visited.insert(Var); +      Result.push_back(Var); +      continue; +    } + +    // Detect cycles. +    auto Res = Visiting.insert(Var); +    if (!Res.second) { +      assert(false && "dependency cycle in local variables"); +      return Result; +    } + +    // Push dependencies and this node onto the worklist, so that this node is +    // visited again after all of its dependencies are handled. +    WorkList.push_back({Var, 1}); +    for (auto *Dependency : dependencies(Var)) { +      auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency); +      WorkList.push_back({DbgVar[Dep], 0}); +    } +  } +  return Result; +} +  DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,                                                SmallVectorImpl<DIE *> &Children,                                                bool *HasNonScopeChildren) {    assert(Children.empty());    DIE *ObjectPointer = nullptr; -  for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope)) +  // Emit function arguments (order is significant). +  auto Vars = DU->getScopeVariables().lookup(Scope); +  for (auto &DV : Vars.Args) +    Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer)); + +  // Emit local variables. +  auto Locals = sortLocalVars(Vars.Locals); +  for (DbgVariable *DV : Locals)      Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));    // Skip imported directives in gmlt-like data. @@ -687,9 +801,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(    else      EntityDie = getDIE(Entity);    assert(EntityDie); -  auto *File = Module->getFile(); -  addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "", -                File ? File->getDirectory() : ""); +  addSourceLine(*IMDie, Module->getLine(), Module->getFile());    addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);    StringRef Name = Module->getName();    if (!Name.empty()) @@ -750,7 +862,7 @@ void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,  void DwarfCompileUnit::emitHeader(bool UseOffsets) {    // Don't bother labeling the .dwo unit, as its offset isn't used. -  if (!Skeleton) { +  if (!Skeleton && !DD->useSectionsAsReferences()) {      LabelBegin = Asm->createTempSymbol("cu_begin");      Asm->OutStreamer->EmitLabel(LabelBegin);    } @@ -759,6 +871,8 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {                                  : DD->useSplitDwarf() ? dwarf::DW_UT_skeleton                                                        : dwarf::DW_UT_compile;    DwarfUnit::emitCommonHeader(UseOffsets, UT); +  if (DD->getDwarfVersion() >= 5 && UT != dwarf::DW_UT_compile) +    Asm->emitInt64(getDWOId());  }  bool DwarfCompileUnit::hasDwarfPubSections() const { @@ -767,7 +881,8 @@ bool DwarfCompileUnit::hasDwarfPubSections() const {    if (CUNode->getGnuPubnames())      return true; -  return DD->tuneForGDB() && !includeMinimalInlineScopes(); +  return DD->tuneForGDB() && DD->usePubSections() && +         !includeMinimalInlineScopes();  }  /// addGlobalName - Add a new global name to the compile unit. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 68482eb7e358..51e1558fe4a3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -83,7 +83,10 @@ class DwarfCompileUnit final : public DwarfUnit {    DenseMap<const MDNode *, DIE *> AbstractSPDies;    DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; -  /// \brief Construct a DIE for the given DbgVariable without initializing the +  /// DWO ID for correlating skeleton and split units. +  uint64_t DWOId = 0; + +  /// Construct a DIE for the given DbgVariable without initializing the    /// DbgVariable's DIE reference.    DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract); @@ -141,7 +144,7 @@ public:    DwarfCompileUnit &getCU() override { return *this; } -  unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; +  unsigned getOrCreateSourceID(const DIFile *File) override;    void addImportedEntity(const DIImportedEntity* IE) {      DIScope *Scope = IE->getScope(); @@ -159,7 +162,7 @@ public:    void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); -  /// \brief Find DIE for the given subprogram and attach appropriate +  /// Find DIE for the given subprogram and attach appropriate    /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global    /// variables in this scope then create and insert DIEs for these    /// variables. @@ -168,7 +171,7 @@ public:    void constructScopeDIE(LexicalScope *Scope,                           SmallVectorImpl<DIE *> &FinalChildren); -  /// \brief A helper function to construct a RangeSpanList for a given +  /// A helper function to construct a RangeSpanList for a given    /// lexical scope.    void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range); @@ -177,11 +180,11 @@ public:    void attachRangesOrLowHighPC(DIE &D,                                 const SmallVectorImpl<InsnRange> &Ranges); -  /// \brief This scope represents inlined body of a function. Construct +  /// This scope represents inlined body of a function. Construct    /// DIE to represent this concrete inlined copy of the function.    DIE *constructInlinedScopeDIE(LexicalScope *Scope); -  /// \brief Construct new DW_TAG_lexical_block for this scope and +  /// Construct new DW_TAG_lexical_block for this scope and    /// attach DW_AT_low_pc/DW_AT_high_pc labels.    DIE *constructLexicalScopeDIE(LexicalScope *Scope); @@ -196,14 +199,14 @@ public:                                SmallVectorImpl<DIE *> &Children,                                bool *HasNonScopeChildren = nullptr); -  /// \brief Construct a DIE for this subprogram scope. +  /// Construct a DIE for this subprogram scope.    void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);    DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);    void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); -  /// \brief Construct import_module DIE. +  /// Construct import_module DIE.    DIE *constructImportedEntityDIE(const DIImportedEntity *Module);    void finishSubprogramDefinition(const DISubprogram *SP); @@ -214,11 +217,18 @@ public:    DbgVariable *getExistingAbstractVariable(InlinedVariable IV,                                             const DILocalVariable *&Cleansed);    DbgVariable *getExistingAbstractVariable(InlinedVariable IV); -  void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); +  void createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope);    /// Set the skeleton unit associated with this unit.    void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } +  unsigned getHeaderSize() const override { +    // DWARF v5 added the DWO ID to the header for split/skeleton units. +    unsigned DWOIdSize = +        DD->getDwarfVersion() >= 5 && DD->useSplitDwarf() ? sizeof(uint64_t) +                                                          : 0; +    return DwarfUnit::getHeaderSize() + DWOIdSize; +  }    unsigned getLength() {      return sizeof(uint32_t) + // Length field          getHeaderSize() + getUnitDie().getSize(); @@ -290,6 +300,9 @@ public:    void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }    const MCSymbol *getBaseAddress() const { return BaseAddress; } +  uint64_t getDWOId() const { return DWOId; } +  void setDWOId(uint64_t DwoId) { DWOId = DwoId; } +    bool hasDwarfPubSections() const;  }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 2c9c7d4f3146..8761fae9dd22 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -16,7 +16,6 @@  #include "DIEHash.h"  #include "DebugLocEntry.h"  #include "DebugLocStream.h" -#include "DwarfAccelTable.h"  #include "DwarfCompileUnit.h"  #include "DwarfExpression.h"  #include "DwarfFile.h" @@ -31,6 +30,7 @@  #include "llvm/ADT/Triple.h"  #include "llvm/ADT/Twine.h"  #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AccelTable.h"  #include "llvm/CodeGen/AsmPrinter.h"  #include "llvm/CodeGen/DIE.h"  #include "llvm/CodeGen/LexicalScopes.h" @@ -39,7 +39,6 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/IR/Constants.h" @@ -66,6 +65,7 @@  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/Timer.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h"  #include <algorithm> @@ -94,6 +94,11 @@ static cl::opt<bool> GenerateARangeSection("generate-arange-section",                                             cl::desc("Generate dwarf aranges"),                                             cl::init(false)); +static cl::opt<bool> +    GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, +                           cl::desc("Generate DWARF4 type units."), +                           cl::init(false)); +  static cl::opt<bool> SplitDwarfCrossCuReferences(      "split-dwarf-cross-cu-references", cl::Hidden,      cl::desc("Enable cross-cu references in DWO files"), cl::init(false)); @@ -107,14 +112,40 @@ static cl::opt<DefaultOnOff> UnknownLocations(                 clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),      cl::init(Default)); +static cl::opt<AccelTableKind> AccelTables( +    "accel-tables", cl::Hidden, cl::desc("Output dwarf accelerator tables."), +    cl::values(clEnumValN(AccelTableKind::Default, "Default", +                          "Default for platform"), +               clEnumValN(AccelTableKind::None, "Disable", "Disabled."), +               clEnumValN(AccelTableKind::Apple, "Apple", "Apple"), +               clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")), +    cl::init(AccelTableKind::Default)); +  static cl::opt<DefaultOnOff> -DwarfAccelTables("dwarf-accel-tables", cl::Hidden, -                 cl::desc("Output prototype dwarf accelerator tables."), +DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden, +                 cl::desc("Use inlined strings rather than string section."),                   cl::values(clEnumVal(Default, "Default for platform"),                              clEnumVal(Enable, "Enabled"),                              clEnumVal(Disable, "Disabled")),                   cl::init(Default)); +static cl::opt<bool> +    NoDwarfPubSections("no-dwarf-pub-sections", cl::Hidden, +                       cl::desc("Disable emission of DWARF pub sections."), +                       cl::init(false)); + +static cl::opt<bool> +    NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden, +                         cl::desc("Disable emission .debug_ranges section."), +                         cl::init(false)); + +static cl::opt<DefaultOnOff> DwarfSectionsAsReferences( +    "dwarf-sections-as-references", cl::Hidden, +    cl::desc("Use sections+offset as references rather than labels."), +    cl::values(clEnumVal(Default, "Default for platform"), +               clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), +    cl::init(Default)); +  enum LinkageNameOption {    DefaultLinkageNames,    AllLinkageNames, @@ -215,11 +246,11 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {                          return A.Expr->isFragment();                        }) &&           "multiple FI expressions without DW_OP_LLVM_fragment"); -  std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), -            [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { -              return A.Expr->getFragmentInfo()->OffsetInBits < -                     B.Expr->getFragmentInfo()->OffsetInBits; -            }); +  llvm::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(), +             [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { +               return A.Expr->getFragmentInfo()->OffsetInBits < +                      B.Expr->getFragmentInfo()->OffsetInBits; +             });    return FrameIndexExprs;  } @@ -258,23 +289,34 @@ void DbgVariable::addMMIEntry(const DbgVariable &V) {           "conflicting locations for variable");  } -static const DwarfAccelTable::Atom TypeAtoms[] = { -    DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), -    DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), -    DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; +static AccelTableKind computeAccelTableKind(unsigned DwarfVersion, +                                            bool GenerateTypeUnits, +                                            DebuggerKind Tuning, +                                            const Triple &TT) { +  // Honor an explicit request. +  if (AccelTables != AccelTableKind::Default) +    return AccelTables; + +  // Accelerator tables with type units are currently not supported. +  if (GenerateTypeUnits) +    return AccelTableKind::None; + +  // Accelerator tables get emitted if targetting DWARF v5 or LLDB.  DWARF v5 +  // always implies debug_names. For lower standard versions we use apple +  // accelerator tables on apple platforms and debug_names elsewhere. +  if (DwarfVersion >= 5) +    return AccelTableKind::Dwarf; +  if (Tuning == DebuggerKind::LLDB) +    return TT.isOSBinFormatMachO() ? AccelTableKind::Apple +                                   : AccelTableKind::Dwarf; +  return AccelTableKind::None; +}  DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)      : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),        InfoHolder(A, "info_string", DIEValueAllocator),        SkeletonHolder(A, "skel_string", DIEValueAllocator), -      IsDarwin(A->TM.getTargetTriple().isOSDarwin()), -      AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, -                                       dwarf::DW_FORM_data4)), -      AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, -                                      dwarf::DW_FORM_data4)), -      AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, -                                           dwarf::DW_FORM_data4)), -      AccelTypes(TypeAtoms) { +      IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {    const Triple &TT = Asm->TM.getTargetTriple();    // Make sure we know our "debugger tuning."  The target option takes @@ -288,11 +330,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)    else      DebuggerTuning = DebuggerKind::GDB; -  // Turn on accelerator tables for LLDB by default. -  if (DwarfAccelTables == Default) -    HasDwarfAccelTables = tuneForLLDB(); +  if (DwarfInlinedStrings == Default) +    UseInlineStrings = TT.isNVPTX();    else -    HasDwarfAccelTables = DwarfAccelTables == Enable; +    UseInlineStrings = DwarfInlinedStrings == Enable; + +  UseLocSection = !TT.isNVPTX();    HasAppleExtensionAttributes = tuneForLLDB(); @@ -308,8 +351,23 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)    unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;    unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber                                      : MMI->getModule()->getDwarfVersion(); -  // Use dwarf 4 by default if nothing is requested. -  DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION; +  // Use dwarf 4 by default if nothing is requested. For NVPTX, use dwarf 2. +  DwarfVersion = +      TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION); + +  UsePubSections = !NoDwarfPubSections && !TT.isNVPTX(); +  UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX(); + +  // Use sections as references. Force for NVPTX. +  if (DwarfSectionsAsReferences == Default) +    UseSectionsAsReferences = TT.isNVPTX(); +  else +    UseSectionsAsReferences = DwarfSectionsAsReferences == Enable; + +  GenerateTypeUnits = GenerateDwarfTypeUnits; + +  TheAccelTableKind = computeAccelTableKind( +      DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());    // Work around a GDB bug. GDB doesn't support the standard opcode;    // SCE doesn't support GNU's; LLDB prefers the standard opcode, which @@ -321,6 +379,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)    // GDB does not fully support the DWARF 4 representation for bitfields.    UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB(); +  // The DWARF v5 string offsets table has - possibly shared - contributions +  // from each compile and type unit each preceded by a header. The string +  // offsets table used by the pre-DWARF v5 split-DWARF implementation uses +  // a monolithic string offsets table without any header. +  UseSegmentedStringOffsetsTable = DwarfVersion >= 5; +    Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);  } @@ -355,17 +419,18 @@ static StringRef getObjCMethodName(StringRef In) {  }  // Add the various names to the Dwarf accelerator table names. -// TODO: Determine whether or not we should add names for programs -// that do not have a DW_AT_name or DW_AT_linkage_name field - this -// is only slightly different than the lookup of non-standard ObjC names.  void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {    if (!SP->isDefinition())      return; -  addAccelName(SP->getName(), Die); -  // If the linkage name is different than the name, go ahead and output -  // that as well into the name table. -  if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName()) +  if (SP->getName() != "") +    addAccelName(SP->getName(), Die); + +  // If the linkage name is different than the name, go ahead and output that as +  // well into the name table. Only do that if we are going to actually emit +  // that name. +  if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && +      (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))      addAccelName(SP->getLinkageName(), Die);    // If this is an Objective-C selector name add it to the ObjC accelerator @@ -471,8 +536,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {    // explicitly describe the directory of all files, never relying on the    // compilation directory.    if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) -    Asm->OutStreamer->getContext().setMCLineTableCompilationDir( -        NewCU.getUniqueID(), CompilationDir); +    Asm->OutStreamer->emitDwarfFile0Directive( +        CompilationDir, FN, NewCU.getMD5AsBytes(DIUnit->getFile()), +        DIUnit->getSource(), NewCU.getUniqueID());    StringRef Producer = DIUnit->getProducer();    StringRef Flags = DIUnit->getFlags(); @@ -486,6 +552,10 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {                  DIUnit->getSourceLanguage());    NewCU.addString(Die, dwarf::DW_AT_name, FN); +  // Add DW_str_offsets_base to the unit DIE, except for split units. +  if (useSegmentedStringOffsetsTable() && !useSplitDwarf()) +    NewCU.addStringOffsetsStart(); +    if (!useSplitDwarf()) {      NewCU.initStmtList(); @@ -541,21 +611,22 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,  /// Sort and unique GVEs by comparing their fragment offset.  static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &  sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { -  std::sort(GVEs.begin(), GVEs.end(), -            [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { -              // Sort order: first null exprs, then exprs without fragment -              // info, then sort by fragment offset in bits. -              // FIXME: Come up with a more comprehensive comparator so -              // the sorting isn't non-deterministic, and so the following -              // std::unique call works correctly. -              if (!A.Expr || !B.Expr) -                return !!B.Expr; -              auto FragmentA = A.Expr->getFragmentInfo(); -              auto FragmentB = B.Expr->getFragmentInfo(); -              if (!FragmentA || !FragmentB) -                return !!FragmentB; -              return FragmentA->OffsetInBits < FragmentB->OffsetInBits; -            }); +  llvm::sort(GVEs.begin(), GVEs.end(), +             [](DwarfCompileUnit::GlobalExpr A, +                DwarfCompileUnit::GlobalExpr B) { +               // Sort order: first null exprs, then exprs without fragment +               // info, then sort by fragment offset in bits. +               // FIXME: Come up with a more comprehensive comparator so +               // the sorting isn't non-deterministic, and so the following +               // std::unique call works correctly. +               if (!A.Expr || !B.Expr) +                 return !!B.Expr; +               auto FragmentA = A.Expr->getFragmentInfo(); +               auto FragmentB = B.Expr->getFragmentInfo(); +               if (!FragmentA || !FragmentB) +                 return !!FragmentB; +               return FragmentA->OffsetInBits < FragmentB->OffsetInBits; +             });    GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),                           [](DwarfCompileUnit::GlobalExpr A,                              DwarfCompileUnit::GlobalExpr B) { @@ -590,6 +661,19 @@ void DwarfDebug::beginModule() {        GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});    } +  // Create the symbol that designates the start of the unit's contribution +  // to the string offsets table. In a split DWARF scenario, only the skeleton +  // unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol). +  if (useSegmentedStringOffsetsTable()) +    (useSplitDwarf() ? SkeletonHolder : InfoHolder) +        .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base")); + +  // Create the symbol that designates the start of the DWARF v5 range list +  // table. It is located past the header and before the offsets table. +  if (getDwarfVersion() >= 5) +    (useSplitDwarf() ? SkeletonHolder : InfoHolder) +        .setRnglistsTableBaseSym(Asm->createTempSymbol("rnglists_table_base")); +    for (DICompileUnit *CUNode : M->debug_compile_units()) {      // FIXME: Move local imported entities into a list attached to the      // subprogram, then this search won't be needed and a @@ -694,11 +778,15 @@ void DwarfDebug::finalizeModuleInfo() {        // Emit a unique identifier for this CU.        uint64_t ID =            DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); -      TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, -                    dwarf::DW_FORM_data8, ID); -      SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, -                    dwarf::DW_FORM_data8, ID); - +      if (getDwarfVersion() >= 5) { +        TheCU.setDWOId(ID); +        SkCU->setDWOId(ID); +      } else { +        TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, +                      dwarf::DW_FORM_data8, ID); +        SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, +                      dwarf::DW_FORM_data8, ID); +      }        // We don't keep track of which addresses are used in which CU so this        // is a bit pessimistic under LTO.        if (!AddrPool.isEmpty()) { @@ -706,7 +794,7 @@ void DwarfDebug::finalizeModuleInfo() {          SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,                                Sym, Sym);        } -      if (!SkCU->getRangeLists().empty()) { +      if (getDwarfVersion() < 5 && !SkCU->getRangeLists().empty()) {          const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();          SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,                                Sym, Sym); @@ -721,7 +809,7 @@ void DwarfDebug::finalizeModuleInfo() {      // ranges for all subprogram DIEs for mach-o.      DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;      if (unsigned NumRanges = TheCU.getRanges().size()) { -      if (NumRanges > 1) +      if (NumRanges > 1 && useRangesSection())          // A DW_AT_low_pc attribute may also be specified in combination with          // DW_AT_ranges to specify the default base address for use in          // location lists (see Section 2.6.2) and range lists (see Section @@ -732,6 +820,10 @@ void DwarfDebug::finalizeModuleInfo() {        U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());      } +    if (getDwarfVersion() >= 5 && !useSplitDwarf() && +        !U.getRangeLists().empty()) +      U.addRnglistsBase(); +      auto *CUNode = cast<DICompileUnit>(P.first);      // If compile Unit has macros, emit "DW_AT_macro_info" attribute.      if (CUNode->getMacros()) @@ -799,11 +891,20 @@ void DwarfDebug::endModule() {    }    // Emit info into the dwarf accelerator table sections. -  if (useDwarfAccelTables()) { +  switch (getAccelTableKind()) { +  case AccelTableKind::Apple:      emitAccelNames();      emitAccelObjC();      emitAccelNamespaces();      emitAccelTypes(); +    break; +  case AccelTableKind::Dwarf: +    emitAccelDebugNames(); +    break; +  case AccelTableKind::None: +    break; +  case AccelTableKind::Default: +    llvm_unreachable("Default should have already been resolved.");    }    // Emit the pubnames and pubtypes sections if requested. @@ -887,7 +988,7 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {    llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");  } -/// \brief If this and Next are describing different fragments of the same +/// If this and Next are describing different fragments of the same  /// variable, merge them by appending Next's values to the current  /// list of values.  /// Return true if the merge was successful. @@ -903,8 +1004,7 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {      // sorted.      for (unsigned i = 0, j = 0; i < Values.size(); ++i) {        for (; j < Next.Values.size(); ++j) { -        int res = DebugHandlerBase::fragmentCmp( -            cast<DIExpression>(Values[i].Expression), +        int res = cast<DIExpression>(Values[i].Expression)->fragmentCmp(              cast<DIExpression>(Next.Values[j].Expression));          if (res == 0) // The two expressions overlap, we can't merge.            return false; @@ -967,7 +1067,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,      // If this fragment overlaps with any open ranges, truncate them.      const DIExpression *DIExpr = Begin->getDebugExpression();      auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) { -      return fragmentsOverlap(DIExpr, R.getExpression()); +      return DIExpr->fragmentsOverlap(R.getExpression());      });      OpenRanges.erase(Last, OpenRanges.end()); @@ -983,7 +1083,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,        EndLabel = getLabelBeforeInsn(std::next(I)->first);      assert(EndLabel && "Forgot label after instruction ending a range!"); -    DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); +    LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");      auto Value = getDebugLocValue(Begin);      DebugLocEntry Loc(StartLabel, EndLabel, Value); @@ -1012,7 +1112,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,      // Attempt to coalesce the ranges of two otherwise identical      // DebugLocEntries.      auto CurEntry = DebugLoc.rbegin(); -    DEBUG({ +    LLVM_DEBUG({        dbgs() << CurEntry->getValues().size() << " Values:\n";        for (auto &Value : CurEntry->getValues())          Value.dump(); @@ -1131,6 +1231,9 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,        RegVar->initializeDbgValue(MInsn);        continue;      } +    // Do not emit location lists if .debug_loc secton is disabled. +    if (!useLocSection()) +      continue;      // Handle multiple DBG_VALUE instructions describing one variable.      DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn); @@ -1151,10 +1254,12 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,    }    // Collect info for variables that were optimized out. -  for (const DILocalVariable *DV : SP->getVariables()) { -    if (Processed.insert(InlinedVariable(DV, nullptr)).second) -      if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) -        createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr)); +  for (const DINode *DN : SP->getRetainedNodes()) { +    if (auto *DV = dyn_cast<DILocalVariable>(DN)) { +      if (Processed.insert(InlinedVariable(DV, nullptr)).second) +        if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) +          createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr)); +    }    }  } @@ -1168,7 +1273,9 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {      return;    // Check if source location changes, but ignore DBG_VALUE and CFI locations. -  if (MI->isMetaInstruction()) +  // If the instruction is part of the function frame setup code, do not emit +  // any line record, as there is no correspondence with any user code. +  if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup))      return;    const DebugLoc &DL = MI->getDebugLoc();    // When we emit a line-0 record, we don't update PrevInstLoc; so look at @@ -1333,14 +1440,16 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {    // Construct abstract scopes.    for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {      auto *SP = cast<DISubprogram>(AScope->getScopeNode()); -    // Collect info for variables that were optimized out. -    for (const DILocalVariable *DV : SP->getVariables()) { -      if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) -        continue; -      ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr), -                                      DV->getScope()); -      assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes -             && "ensureAbstractVariableIsCreated inserted abstract scopes"); +    for (const DINode *DN : SP->getRetainedNodes()) { +      if (auto *DV = dyn_cast<DILocalVariable>(DN)) { +        // Collect info for variables that were optimized out. +        if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) +          continue; +        ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr), +                                        DV->getScope()); +        assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes +               && "ensureAbstractVariableIsCreated inserted abstract scopes"); +      }      }      constructAbstractSubprogramScopeDIE(TheCU, AScope);    } @@ -1366,21 +1475,19 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {  void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,                                    unsigned Flags) {    StringRef Fn; -  StringRef Dir; -  unsigned Src = 1; +  unsigned FileNo = 1;    unsigned Discriminator = 0;    if (auto *Scope = cast_or_null<DIScope>(S)) {      Fn = Scope->getFilename(); -    Dir = Scope->getDirectory();      if (Line != 0 && getDwarfVersion() >= 4)        if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))          Discriminator = LBF->getDiscriminator();      unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); -    Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) -              .getOrCreateSourceID(Fn, Dir); +    FileNo = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) +              .getOrCreateSourceID(Scope->getFile());    } -  Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0, +  Asm->OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,                                            Discriminator, Fn);  } @@ -1401,13 +1508,30 @@ void DwarfDebug::emitAbbreviations() {    Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());  } -void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section, +void DwarfDebug::emitStringOffsetsTableHeader() { +  DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; +  Holder.getStringPool().emitStringOffsetsTableHeader( +      *Asm, Asm->getObjFileLowering().getDwarfStrOffSection(), +      Holder.getStringOffsetsStartSym()); +} + +template <typename AccelTableT> +void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section,                             StringRef TableName) { -  Accel.FinalizeTable(Asm, TableName);    Asm->OutStreamer->SwitchSection(Section);    // Emit the full data. -  Accel.emit(Asm, Section->getBeginSymbol(), this); +  emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol()); +} + +void DwarfDebug::emitAccelDebugNames() { +  // Don't emit anything if we have no compilation units to index. +  if (getUnits().empty()) +    return; + +  Asm->OutStreamer->SwitchSection( +      Asm->getObjFileLowering().getDwarfDebugNamesSection()); +  emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits());  }  // Emit visible names into a hashed accelerator table section. @@ -1525,6 +1649,14 @@ void DwarfDebug::emitDebugPubSections() {    }  } +void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) { +  if (useSectionsAsReferences()) +    Asm->EmitDwarfOffset(CU.getSection()->getBeginSymbol(), +                         CU.getDebugSectionOffset()); +  else +    Asm->emitDwarfSymbolReference(CU.getLabelBegin()); +} +  void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,                                       DwarfCompileUnit *TheU,                                       const StringMap<const DIE *> &Globals) { @@ -1540,13 +1672,13 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,    Asm->OutStreamer->EmitLabel(BeginLabel);    Asm->OutStreamer->AddComment("DWARF Version"); -  Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); +  Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION);    Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); -  Asm->emitDwarfSymbolReference(TheU->getLabelBegin()); +  emitSectionReference(*TheU);    Asm->OutStreamer->AddComment("Compilation Unit Length"); -  Asm->EmitInt32(TheU->getLength()); +  Asm->emitInt32(TheU->getLength());    // Emit the pubnames for this compilation unit.    for (const auto &GI : Globals) { @@ -1554,14 +1686,14 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,      const DIE *Entity = GI.second;      Asm->OutStreamer->AddComment("DIE offset"); -    Asm->EmitInt32(Entity->getOffset()); +    Asm->emitInt32(Entity->getOffset());      if (GnuStyle) {        dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);        Asm->OutStreamer->AddComment(            Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +            dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); -      Asm->EmitInt8(Desc.toBits()); +      Asm->emitInt8(Desc.toBits());      }      Asm->OutStreamer->AddComment("External Name"); @@ -1569,14 +1701,20 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,    }    Asm->OutStreamer->AddComment("End Mark"); -  Asm->EmitInt32(0); +  Asm->emitInt32(0);    Asm->OutStreamer->EmitLabel(EndLabel);  }  /// Emit null-terminated strings into a debug str section.  void DwarfDebug::emitDebugStr() { +  MCSection *StringOffsetsSection = nullptr; +  if (useSegmentedStringOffsetsTable()) { +    emitStringOffsetsTableHeader(); +    StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection(); +  }    DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; -  Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); +  Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(), +                     StringOffsetsSection, /* UseRelativeOffsets = */ true);  }  void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, @@ -1589,7 +1727,6 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,  }  static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, -                              ByteStreamer &Streamer,                                const DebugLocEntry::Value &Value,                                DwarfExpression &DwarfExpr) {    auto *DIExpr = Value.getExpression(); @@ -1634,11 +1771,11 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,             "fragments are expected to be sorted");      for (auto Fragment : Values) -      emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr); +      emitDebugLocValue(AP, BT, Fragment, DwarfExpr);    } else {      assert(Values.size() == 1 && "only fragments may have >1 value"); -    emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr); +    emitDebugLocValue(AP, BT, Value, DwarfExpr);    }    DwarfExpr.finalize();  } @@ -1646,7 +1783,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,  void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {    // Emit the size.    Asm->OutStreamer->AddComment("Loc expr size"); -  Asm->EmitInt16(DebugLocs.getBytes(Entry).size()); +  Asm->emitInt16(DebugLocs.getBytes(Entry).size());    // Emit the entry.    APByteStreamer Streamer(*Asm); @@ -1694,14 +1831,14 @@ void DwarfDebug::emitDebugLocDWO() {        // rather than two. We could get fancier and try to, say, reuse an        // address we know we've emitted elsewhere (the start of the function?        // The start of the CU or CU subrange that encloses this range?) -      Asm->EmitInt8(dwarf::DW_LLE_startx_length); +      Asm->emitInt8(dwarf::DW_LLE_startx_length);        unsigned idx = AddrPool.getIndex(Entry.BeginSym);        Asm->EmitULEB128(idx);        Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);        emitDebugLocEntryLocation(Entry);      } -    Asm->EmitInt8(dwarf::DW_LLE_end_of_list); +    Asm->emitInt8(dwarf::DW_LLE_end_of_list);    }  } @@ -1752,7 +1889,7 @@ void DwarfDebug::emitDebugARanges() {      }      // Sort the symbols by offset within the section. -    std::sort( +    std::stable_sort(          List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {            unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;            unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; @@ -1801,10 +1938,10 @@ void DwarfDebug::emitDebugARanges() {    }    // Sort the CU list (again, to ensure consistent output order). -  std::sort(CUs.begin(), CUs.end(), -            [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { -              return A->getUniqueID() < B->getUniqueID(); -            }); +  llvm::sort(CUs.begin(), CUs.end(), +             [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { +               return A->getUniqueID() < B->getUniqueID(); +             });    // Emit an arange table for each CU we used.    for (DwarfCompileUnit *CU : CUs) { @@ -1832,15 +1969,15 @@ void DwarfDebug::emitDebugARanges() {      // For each compile unit, write the list of spans it covers.      Asm->OutStreamer->AddComment("Length of ARange Set"); -    Asm->EmitInt32(ContentSize); +    Asm->emitInt32(ContentSize);      Asm->OutStreamer->AddComment("DWARF Arange version number"); -    Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); +    Asm->emitInt16(dwarf::DW_ARANGES_VERSION);      Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); -    Asm->emitDwarfSymbolReference(CU->getLabelBegin()); +    emitSectionReference(*CU);      Asm->OutStreamer->AddComment("Address Size (in bytes)"); -    Asm->EmitInt8(PtrSize); +    Asm->emitInt8(PtrSize);      Asm->OutStreamer->AddComment("Segment Size (in bytes)"); -    Asm->EmitInt8(0); +    Asm->emitInt8(0);      Asm->OutStreamer->emitFill(Padding, 0xff); @@ -1867,17 +2004,151 @@ void DwarfDebug::emitDebugARanges() {    }  } -/// Emit address ranges into a debug ranges section. +/// Emit a single range list. We handle both DWARF v5 and earlier. +static void emitRangeList(AsmPrinter *Asm, DwarfCompileUnit *CU, +                          const RangeSpanList &List) { + +  auto DwarfVersion = CU->getDwarfVersion(); +  // Emit our symbol so we can find the beginning of the range. +  Asm->OutStreamer->EmitLabel(List.getSym()); +  // Gather all the ranges that apply to the same section so they can share +  // a base address entry. +  MapVector<const MCSection *, std::vector<const RangeSpan *>> SectionRanges; +  // Size for our labels. +  auto Size = Asm->MAI->getCodePointerSize(); + +  for (const RangeSpan &Range : List.getRanges()) +    SectionRanges[&Range.getStart()->getSection()].push_back(&Range); + +  auto *CUBase = CU->getBaseAddress(); +  bool BaseIsSet = false; +  for (const auto &P : SectionRanges) { +    // Don't bother with a base address entry if there's only one range in +    // this section in this range list - for example ranges for a CU will +    // usually consist of single regions from each of many sections +    // (-ffunction-sections, or just C++ inline functions) except under LTO +    // or optnone where there may be holes in a single CU's section +    // contributions. +    auto *Base = CUBase; +    if (!Base && P.second.size() > 1 && +        (UseDwarfRangesBaseAddressSpecifier || DwarfVersion >= 5)) { +      BaseIsSet = true; +      // FIXME/use care: This may not be a useful base address if it's not +      // the lowest address/range in this object. +      Base = P.second.front()->getStart(); +      if (DwarfVersion >= 5) { +        Asm->OutStreamer->AddComment("DW_RLE_base_address"); +        Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_address, 1); +      } else +        Asm->OutStreamer->EmitIntValue(-1, Size); +      Asm->OutStreamer->AddComment("  base address"); +      Asm->OutStreamer->EmitSymbolValue(Base, Size); +    } else if (BaseIsSet && DwarfVersion < 5) { +      BaseIsSet = false; +      assert(!Base); +      Asm->OutStreamer->EmitIntValue(-1, Size); +      Asm->OutStreamer->EmitIntValue(0, Size); +    } + +    for (const auto *RS : P.second) { +      const MCSymbol *Begin = RS->getStart(); +      const MCSymbol *End = RS->getEnd(); +      assert(Begin && "Range without a begin symbol?"); +      assert(End && "Range without an end symbol?"); +      if (Base) { +        if (DwarfVersion >= 5) { +          // Emit DW_RLE_offset_pair when we have a base. +          Asm->OutStreamer->AddComment("DW_RLE_offset_pair"); +          Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1); +          Asm->OutStreamer->AddComment("  starting offset"); +          Asm->EmitLabelDifferenceAsULEB128(Begin, Base); +          Asm->OutStreamer->AddComment("  ending offset"); +          Asm->EmitLabelDifferenceAsULEB128(End, Base); +        } else { +          Asm->EmitLabelDifference(Begin, Base, Size); +          Asm->EmitLabelDifference(End, Base, Size); +        } +      } else if (DwarfVersion >= 5) { +        Asm->OutStreamer->AddComment("DW_RLE_start_length"); +        Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_start_length, 1); +        Asm->OutStreamer->AddComment("  start"); +        Asm->OutStreamer->EmitSymbolValue(Begin, Size); +        Asm->OutStreamer->AddComment("  length"); +        Asm->EmitLabelDifferenceAsULEB128(End, Begin); +      } else { +        Asm->OutStreamer->EmitSymbolValue(Begin, Size); +        Asm->OutStreamer->EmitSymbolValue(End, Size); +      } +    } +  } +  if (DwarfVersion >= 5) { +    Asm->OutStreamer->AddComment("DW_RLE_end_of_list"); +    Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1); +  } else { +    // Terminate the list with two 0 values. +    Asm->OutStreamer->EmitIntValue(0, Size); +    Asm->OutStreamer->EmitIntValue(0, Size); +  } +} + +// Emit the header of a DWARF 5 range list table. Returns the symbol that +// designates the end of the table for the caller to emit when the table is +// complete. +static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, DwarfFile &Holder) { +  // The length is described by a starting label right after the length field +  // and an end label. +  MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); +  MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); +  // Build the range table header, which starts with the length field. +  Asm->EmitLabelDifference(TableEnd, TableStart, 4); +  Asm->OutStreamer->EmitLabel(TableStart); +  // Version number (DWARF v5 and later). +  Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion()); +  // Address size. +  Asm->emitInt8(Asm->MAI->getCodePointerSize()); +  // Segment selector size. +  Asm->emitInt8(0); + +  MCSymbol *RnglistTableBaseSym = Holder.getRnglistsTableBaseSym(); + +  // FIXME: Generate the offsets table and use DW_FORM_rnglistx with the +  // DW_AT_ranges attribute. Until then set the number of offsets to 0. +  Asm->emitInt32(0); +  Asm->OutStreamer->EmitLabel(RnglistTableBaseSym); +  return TableEnd; +} + +/// Emit address ranges into the .debug_ranges section or into the DWARF v5 +/// .debug_rnglists section.  void DwarfDebug::emitDebugRanges() {    if (CUMap.empty())      return; -  // Start the dwarf ranges section. -  Asm->OutStreamer->SwitchSection( -      Asm->getObjFileLowering().getDwarfRangesSection()); +  auto NoRangesPresent = [this]() { +    return llvm::all_of( +        CUMap, [](const decltype(CUMap)::const_iterator::value_type &Pair) { +          return Pair.second->getRangeLists().empty(); +        }); +  }; -  // Size for our labels. -  unsigned char Size = Asm->MAI->getCodePointerSize(); +  if (!useRangesSection()) { +    assert(NoRangesPresent() && "No debug ranges expected."); +    return; +  } + +  if (getDwarfVersion() >= 5 && NoRangesPresent()) +    return; + +  // Start the dwarf ranges section. +  MCSymbol *TableEnd = nullptr; +  if (getDwarfVersion() >= 5) { +    Asm->OutStreamer->SwitchSection( +        Asm->getObjFileLowering().getDwarfRnglistsSection()); +    TableEnd = emitRnglistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder +                                                            : InfoHolder); +  } else +    Asm->OutStreamer->SwitchSection( +        Asm->getObjFileLowering().getDwarfRangesSection());    // Grab the specific ranges for the compile units in the module.    for (const auto &I : CUMap) { @@ -1887,61 +2158,12 @@ void DwarfDebug::emitDebugRanges() {        TheCU = Skel;      // Iterate over the misc ranges for the compile units in the module. -    for (const RangeSpanList &List : TheCU->getRangeLists()) { -      // Emit our symbol so we can find the beginning of the range. -      Asm->OutStreamer->EmitLabel(List.getSym()); - -      // Gather all the ranges that apply to the same section so they can share -      // a base address entry. -      MapVector<const MCSection *, std::vector<const RangeSpan *>> MV; -      for (const RangeSpan &Range : List.getRanges()) { -        MV[&Range.getStart()->getSection()].push_back(&Range); -      } - -      auto *CUBase = TheCU->getBaseAddress(); -      bool BaseIsSet = false; -      for (const auto &P : MV) { -        // Don't bother with a base address entry if there's only one range in -        // this section in this range list - for example ranges for a CU will -        // usually consist of single regions from each of many sections -        // (-ffunction-sections, or just C++ inline functions) except under LTO -        // or optnone where there may be holes in a single CU's section -        // contrubutions. -        auto *Base = CUBase; -        if (!Base && P.second.size() > 1 && -            UseDwarfRangesBaseAddressSpecifier) { -          BaseIsSet = true; -          // FIXME/use care: This may not be a useful base address if it's not -          // the lowest address/range in this object. -          Base = P.second.front()->getStart(); -          Asm->OutStreamer->EmitIntValue(-1, Size); -          Asm->OutStreamer->EmitSymbolValue(Base, Size); -        } else if (BaseIsSet) { -          BaseIsSet = false; -          Asm->OutStreamer->EmitIntValue(-1, Size); -          Asm->OutStreamer->EmitIntValue(0, Size); -        } - -        for (const auto *RS : P.second) { -          const MCSymbol *Begin = RS->getStart(); -          const MCSymbol *End = RS->getEnd(); -          assert(Begin && "Range without a begin symbol?"); -          assert(End && "Range without an end symbol?"); -          if (Base) { -            Asm->EmitLabelDifference(Begin, Base, Size); -            Asm->EmitLabelDifference(End, Base, Size); -          } else { -            Asm->OutStreamer->EmitSymbolValue(Begin, Size); -            Asm->OutStreamer->EmitSymbolValue(End, Size); -          } -        } -      } - -      // And terminate the list with two 0 values. -      Asm->OutStreamer->EmitIntValue(0, Size); -      Asm->OutStreamer->EmitIntValue(0, Size); -    } +    for (const RangeSpanList &List : TheCU->getRangeLists()) +      emitRangeList(Asm, TheCU, List);    } + +  if (TableEnd) +    Asm->OutStreamer->EmitLabel(TableEnd);  }  void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { @@ -1963,20 +2185,17 @@ void DwarfDebug::emitMacro(DIMacro &M) {    Asm->OutStreamer->EmitBytes(Name);    if (!Value.empty()) {      // There should be one space between macro name and macro value. -    Asm->EmitInt8(' '); +    Asm->emitInt8(' ');      Asm->OutStreamer->EmitBytes(Value);    } -  Asm->EmitInt8('\0'); +  Asm->emitInt8('\0');  }  void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {    assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);    Asm->EmitULEB128(dwarf::DW_MACINFO_start_file);    Asm->EmitULEB128(F.getLine()); -  DIFile *File = F.getFile(); -  unsigned FID = -      U.getOrCreateSourceID(File->getFilename(), File->getDirectory()); -  Asm->EmitULEB128(FID); +  Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile()));    handleMacroNodes(F.getElements(), U);    Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);  } @@ -1995,11 +2214,14 @@ void DwarfDebug::emitDebugMacinfo() {      auto *SkCU = TheCU.getSkeleton();      DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;      auto *CUNode = cast<DICompileUnit>(P.first); -    Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); -    handleMacroNodes(CUNode->getMacros(), U); +    DIMacroNodeArray Macros = CUNode->getMacros(); +    if (!Macros.empty()) { +      Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); +      handleMacroNodes(Macros, U); +    }    }    Asm->OutStreamer->AddComment("End Of Macro List Mark"); -  Asm->EmitInt8(0); +  Asm->emitInt8(0);  }  // DWARF5 Experimental Separate Dwarf emitters. @@ -2017,9 +2239,6 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,    SkeletonHolder.addUnit(std::move(NewU));  } -// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, -// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_addr_base, DW_AT_ranges_base.  DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {    auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>( @@ -2029,6 +2248,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {    NewCU.initStmtList(); +  if (useSegmentedStringOffsetsTable()) +    NewCU.addStringOffsetsStart(); +    initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));    return NewCU; @@ -2051,26 +2273,37 @@ void DwarfDebug::emitDebugAbbrevDWO() {  void DwarfDebug::emitDebugLineDWO() {    assert(useSplitDwarf() && "No split dwarf?"); -  Asm->OutStreamer->SwitchSection( +  SplitTypeUnitFileTable.Emit( +      *Asm->OutStreamer, MCDwarfLineTableParams(),        Asm->getObjFileLowering().getDwarfLineDWOSection()); -  SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams()); +} + +void DwarfDebug::emitStringOffsetsTableHeaderDWO() { +  assert(useSplitDwarf() && "No split dwarf?"); +  InfoHolder.getStringPool().emitStringOffsetsTableHeader( +      *Asm, Asm->getObjFileLowering().getDwarfStrOffDWOSection(), +      InfoHolder.getStringOffsetsStartSym());  }  // Emit the .debug_str.dwo section for separated dwarf. This contains the  // string section and is identical in format to traditional .debug_str  // sections.  void DwarfDebug::emitDebugStrDWO() { +  if (useSegmentedStringOffsetsTable()) +    emitStringOffsetsTableHeaderDWO();    assert(useSplitDwarf() && "No split dwarf?");    MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();    InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), -                         OffSec); +                         OffSec, /* UseRelativeOffsets = */ false);  }  MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {    if (!useSplitDwarf())      return nullptr; -  if (SingleCU) -    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory()); +  const DICompileUnit *DIUnit = CU.getCUNode(); +  SplitTypeUnitFileTable.maybeSetRootFile( +      DIUnit->getDirectory(), DIUnit->getFilename(), +      CU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());    return &SplitTypeUnitFileTable;  } @@ -2119,10 +2352,16 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,    if (useSplitDwarf())      NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());    else { -    CU.applyStmtList(UnitDie);      NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature)); +    // Non-split type units reuse the compile unit's line table. +    CU.applyStmtList(UnitDie);    } +  // Add DW_AT_str_offsets_base to the type unit DIE, but not for split type +  // units. +  if (useSegmentedStringOffsetsTable() && !useSplitDwarf()) +    NewTU.addStringOffsetsStart(); +    NewTU.setType(NewTU.createTypeDIE(CTy));    if (TopLevelType) { @@ -2157,32 +2396,50 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,    CU.addDIETypeSignature(RefDie, Signature);  } -// Accelerator table mutators - add each name along with its companion -// DIE to the proper table while ensuring that the name that we're going -// to reference is in the string table. We do this since the names we -// add may not only be identical to the names in the DIE. -void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { -  if (!useDwarfAccelTables()) +// Add the Name along with its companion DIE to the appropriate accelerator +// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for +// AccelTableKind::Apple, we use the table we got as an argument). If +// accelerator tables are disabled, this function does nothing. +template <typename DataT> +void DwarfDebug::addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name, +                                  const DIE &Die) { +  if (getAccelTableKind() == AccelTableKind::None)      return; -  AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); + +  DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; +  DwarfStringPoolEntryRef Ref = +      Holder.getStringPool().getEntry(*Asm, Name); + +  switch (getAccelTableKind()) { +  case AccelTableKind::Apple: +    AppleAccel.addName(Ref, Die); +    break; +  case AccelTableKind::Dwarf: +    AccelDebugNames.addName(Ref, Die); +    break; +  case AccelTableKind::Default: +    llvm_unreachable("Default should have already been resolved."); +  case AccelTableKind::None: +    llvm_unreachable("None handled above"); +  } +} + +void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { +  addAccelNameImpl(AccelNames, Name, Die);  }  void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { -  if (!useDwarfAccelTables()) -    return; -  AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); +  // ObjC names go only into the Apple accelerator tables. +  if (getAccelTableKind() == AccelTableKind::Apple) +    addAccelNameImpl(AccelObjC, Name, Die);  }  void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { -  if (!useDwarfAccelTables()) -    return; -  AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); +  addAccelNameImpl(AccelNamespace, Name, Die);  }  void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { -  if (!useDwarfAccelTables()) -    return; -  AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); +  addAccelNameImpl(AccelTypes, Name, Die);  }  uint16_t DwarfDebug::getDwarfVersion() const { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 2ae0b418a91e..0c7be5d27dfe 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -18,7 +18,6 @@  #include "DbgValueHistoryCalculator.h"  #include "DebugHandlerBase.h"  #include "DebugLocStream.h" -#include "DwarfAccelTable.h"  #include "DwarfFile.h"  #include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/DenseMap.h" @@ -31,6 +30,7 @@  #include "llvm/ADT/StringMap.h"  #include "llvm/ADT/StringRef.h"  #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AccelTable.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/DebugLoc.h" @@ -192,6 +192,14 @@ struct SymbolCU {    DwarfCompileUnit *CU;  }; +/// The kind of accelerator tables we should emit. +enum class AccelTableKind { +  Default, ///< Platform default. +  None,    ///< None. +  Apple,   ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc. +  Dwarf,   ///< DWARF v5 .debug_names. +}; +  /// Collects and handles dwarf debug information.  class DwarfDebug : public DebugHandlerBase {    /// All DIEValues are allocated through this allocator. @@ -255,12 +263,37 @@ class DwarfDebug : public DebugHandlerBase {    /// Whether to emit all linkage names, or just abstract subprograms.    bool UseAllLinkageNames; +  /// Use inlined strings. +  bool UseInlineStrings = false; + +  /// Whether to emit DWARF pub sections or not. +  bool UsePubSections = true; + +  /// Allow emission of .debug_ranges section. +  bool UseRangesSection = true; + +  /// True if the sections itself must be used as references and don't create +  /// temp symbols inside DWARF sections. +  bool UseSectionsAsReferences = false; + +  ///Allow emission of the .debug_loc section. +  bool UseLocSection = true; + +  /// Generate DWARF v4 type units. +  bool GenerateTypeUnits; +    /// DWARF5 Experimental Options    /// @{ -  bool HasDwarfAccelTables; +  AccelTableKind TheAccelTableKind;    bool HasAppleExtensionAttributes;    bool HasSplitDwarf; +  /// Whether to generate the DWARF v5 string offsets table. +  /// It consists of a series of contributions, each preceded by a header. +  /// The pre-DWARF v5 string offsets table for split dwarf is, in contrast, +  /// a monolithic sequence of string offsets. +  bool UseSegmentedStringOffsetsTable; +    /// Separated Dwarf Variables    /// In general these will all be for bits that are left in the    /// original object file, rather than things that are meant @@ -283,10 +316,12 @@ class DwarfDebug : public DebugHandlerBase {    AddressPool AddrPool; -  DwarfAccelTable AccelNames; -  DwarfAccelTable AccelObjC; -  DwarfAccelTable AccelNamespace; -  DwarfAccelTable AccelTypes; +  /// Accelerator tables. +  AccelTable<DWARF5AccelTableData> AccelDebugNames; +  AccelTable<AppleAccelTableOffsetData> AccelNames; +  AccelTable<AppleAccelTableOffsetData> AccelObjC; +  AccelTable<AppleAccelTableOffsetData> AccelNamespace; +  AccelTable<AppleAccelTableTypeData> AccelTypes;    // Identify a debugger for "tuning" the debug info.    DebuggerKind DebuggerTuning = DebuggerKind::Default; @@ -299,9 +334,9 @@ class DwarfDebug : public DebugHandlerBase {    using InlinedVariable = DbgValueHistoryMap::InlinedVariable; -  void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var, +  void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,                                         const MDNode *Scope); -  void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable Var, +  void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV,                                                 const MDNode *Scope);    DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU, @@ -310,6 +345,10 @@ class DwarfDebug : public DebugHandlerBase {    /// Construct a DIE for this abstract scope.    void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); +  template <typename DataT> +  void addAccelNameImpl(AccelTable<DataT> &AppleAccel, StringRef Name, +                        const DIE &Die); +    void finishVariableDefinitions();    void finishSubprogramDefinitions(); @@ -324,9 +363,15 @@ class DwarfDebug : public DebugHandlerBase {    /// Emit the abbreviation section.    void emitAbbreviations(); +  /// Emit the string offsets table header. +  void emitStringOffsetsTableHeader(); +    /// Emit a specified accelerator table. -  void emitAccel(DwarfAccelTable &Accel, MCSection *Section, -                 StringRef TableName); +  template <typename AccelTableT> +  void emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName); + +  /// Emit DWARF v5 accelerator table. +  void emitAccelDebugNames();    /// Emit visible names into a hashed accelerator table section.    void emitAccelNames(); @@ -363,6 +408,9 @@ class DwarfDebug : public DebugHandlerBase {    /// Emit address ranges into a debug ranges section.    void emitDebugRanges(); +  /// Emit range lists into a DWARF v5 debug rnglists section. +  void emitDebugRnglists(); +    /// Emit macros into a debug macinfo section.    void emitDebugMacinfo();    void emitMacro(DIMacro &M); @@ -375,8 +423,13 @@ class DwarfDebug : public DebugHandlerBase {    void initSkeletonUnit(const DwarfUnit &U, DIE &Die,                          std::unique_ptr<DwarfCompileUnit> NewU); -  /// Construct the split debug info compile unit for the debug info -  /// section. +  /// Construct the split debug info compile unit for the debug info section. +  /// In DWARF v5, the skeleton unit DIE may have the following attributes: +  /// DW_AT_addr_base, DW_AT_comp_dir, DW_AT_dwo_name, DW_AT_high_pc, +  /// DW_AT_low_pc, DW_AT_ranges, DW_AT_stmt_list, and DW_AT_str_offsets_base. +  /// Prior to DWARF v5 it may also have DW_AT_GNU_dwo_id. DW_AT_GNU_dwo_name +  /// is used instead of DW_AT_dwo_name, Dw_AT_GNU_addr_base instead of +  /// DW_AT_addr_base, and DW_AT_GNU_ranges_base instead of DW_AT_rnglists_base.    DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);    /// Emit the debug info dwo section. @@ -388,6 +441,9 @@ class DwarfDebug : public DebugHandlerBase {    /// Emit the debug line dwo section.    void emitDebugLineDWO(); +  /// Emit the dwo stringoffsets table header. +  void emitStringOffsetsTableHeaderDWO(); +    /// Emit the debug str dwo section.    void emitDebugStrDWO(); @@ -422,6 +478,9 @@ class DwarfDebug : public DebugHandlerBase {    void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,                                        DenseSet<InlinedVariable> &P); +  /// Emit the reference to the section. +  void emitSectionReference(const DwarfCompileUnit &CU); +  protected:    /// Gather pre-function debug information.    void beginFunctionImpl(const MachineFunction *MF) override; @@ -478,11 +537,30 @@ public:    /// DWARF4 format.    bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; } +  /// Returns whether to use inline strings. +  bool useInlineStrings() const { return UseInlineStrings; } + +  /// Returns whether GNU pub sections should be emitted. +  bool usePubSections() const { return UsePubSections; } + +  /// Returns whether ranges section should be emitted. +  bool useRangesSection() const { return UseRangesSection; } + +  /// Returns whether to use sections as labels rather than temp symbols. +  bool useSectionsAsReferences() const { +    return UseSectionsAsReferences; +  } + +  /// Returns whether .debug_loc section should be emitted. +  bool useLocSection() const { return UseLocSection; } + +  /// Returns whether to generate DWARF v4 type units. +  bool generateTypeUnits() const { return GenerateTypeUnits; } +    // Experimental DWARF5 features. -  /// Returns whether or not to emit tables that dwarf consumers can -  /// use to accelerate lookup. -  bool useDwarfAccelTables() const { return HasDwarfAccelTables; } +  /// Returns what kind (if any) of accelerator tables to emit. +  AccelTableKind getAccelTableKind() const { return TheAccelTableKind; }    bool useAppleExtensionAttributes() const {      return HasAppleExtensionAttributes; @@ -492,6 +570,16 @@ public:    /// split dwarf proposal support.    bool useSplitDwarf() const { return HasSplitDwarf; } +  /// Returns whether to generate a string offsets table with (possibly shared) +  /// contributions from each CU and type unit. This implies the use of +  /// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that +  /// DW_FORM_GNU_str_index is also an indirect reference, but it is used with +  /// a pre-DWARF v5 implementation of split DWARF sections, which uses a +  /// monolithic string offsets table. +  bool useSegmentedStringOffsetsTable() const { +    return UseSegmentedStringOffsetsTable; +  } +    bool shareAcrossDWOCUs() const;    /// Returns the Dwarf Version. @@ -537,6 +625,9 @@ public:    /// Find the matching DwarfCompileUnit for the given CU DIE.    DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); } +  const DwarfCompileUnit *lookupCU(const DIE *Die) const { +    return CUDieMap.lookup(Die); +  }    /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.    /// diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 80d5bd208ed8..b57ea8fc6322 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -70,7 +70,7 @@ public:  };  class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { -  void emitTypeInfos(unsigned TTypeEncoding) override; +  void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override;    ARMTargetStreamer &getTargetStreamer();  public: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 68d25fe37b43..d8d1a5e8f841 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -123,7 +123,10 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,    const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg);    unsigned RegSize = TRI.getRegSizeInBits(*RC);    // Keep track of the bits in the register we already emitted, so we -  // can avoid emitting redundant aliasing subregs. +  // can avoid emitting redundant aliasing subregs. Because this is +  // just doing a greedy scan of all subregisters, it is possible that +  // this doesn't find a combination of subregisters that fully cover +  // the register (even though one may exist).    SmallBitVector Coverage(RegSize, false);    for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {      unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR); @@ -143,7 +146,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,      if (CurSubReg.test(Coverage)) {        // Emit a piece for any gap in the coverage.        if (Offset > CurPos) -        DwarfRegs.push_back({-1, Offset - CurPos, nullptr}); +        DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"});        DwarfRegs.push_back(            {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});        if (Offset >= MaxSize) @@ -154,8 +157,13 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,        CurPos = Offset + Size;      }    } - -  return CurPos; +  // Failed to find any DWARF encoding. +  if (CurPos == 0) +    return false; +  // Found a partial or complete DWARF encoding. +  if (CurPos < RegSize) +    DwarfRegs.push_back({-1, RegSize - CurPos, "no DWARF register encoding"}); +  return true;  }  void DwarfExpression::addStackValue() { @@ -341,11 +349,22 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,      case dwarf::DW_OP_plus:      case dwarf::DW_OP_minus:      case dwarf::DW_OP_mul: +    case dwarf::DW_OP_div: +    case dwarf::DW_OP_mod: +    case dwarf::DW_OP_or: +    case dwarf::DW_OP_and: +    case dwarf::DW_OP_xor: +    case dwarf::DW_OP_shl: +    case dwarf::DW_OP_shr: +    case dwarf::DW_OP_shra: +    case dwarf::DW_OP_lit0: +    case dwarf::DW_OP_not: +    case dwarf::DW_OP_dup:        emitOp(Op->getOp());        break;      case dwarf::DW_OP_deref:        assert(LocationKind != Register); -      if (LocationKind != Memory && isMemoryLocation(ExprCursor)) +      if (LocationKind != Memory && ::isMemoryLocation(ExprCursor))          // Turning this into a memory location description makes the deref          // implicit.          LocationKind = Memory; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index ea5cbc40ba35..952b0d99a95a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -211,6 +211,9 @@ public:    /// Emit an unsigned constant.    void addUnsignedConstant(const APInt &Value); +  bool isMemoryLocation() const { return LocationKind == Memory; } +  bool isUnknownLocation() const { return LocationKind == Unknown; } +    /// Lock this down to become a memory location description.    void setMemoryLocationKind() {      assert(LocationKind == Unknown); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 3c04c969192d..c90bd568162d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -77,42 +77,24 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {  void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }  // Emit strings into a string section. -void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) { -  StrPool.emit(*Asm, StrSection, OffsetSection); +void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection, +                            bool UseRelativeOffsets) { +  StrPool.emit(*Asm, StrSection, OffsetSection, UseRelativeOffsets);  }  bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { -  SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; +  auto &ScopeVars = ScopeVariables[LS];    const DILocalVariable *DV = Var->getVariable(); -  // Variables with positive arg numbers are parameters.    if (unsigned ArgNum = DV->getArg()) { -    // Keep all parameters in order at the start of the variable list to ensure -    // function types are correct (no out-of-order parameters) -    // -    // This could be improved by only doing it for optimized builds (unoptimized -    // builds have the right order to begin with), searching from the back (this -    // would catch the unoptimized case quickly), or doing a binary search -    // rather than linear search. -    auto I = Vars.begin(); -    while (I != Vars.end()) { -      unsigned CurNum = (*I)->getVariable()->getArg(); -      // A local (non-parameter) variable has been found, insert immediately -      // before it. -      if (CurNum == 0) -        break; -      // A later indexed parameter has been found, insert immediately before it. -      if (CurNum > ArgNum) -        break; -      if (CurNum == ArgNum) { -        (*I)->addMMIEntry(*Var); -        return false; -      } -      ++I; +    auto Cached = ScopeVars.Args.find(ArgNum); +    if (Cached == ScopeVars.Args.end()) +      ScopeVars.Args[ArgNum] = Var; +    else { +      Cached->second->addMMIEntry(*Var); +      return false;      } -    Vars.insert(I, Var); -    return true; -  } - -  Vars.push_back(Var); +  } else { +    ScopeVars.Locals.push_back(Var); +  }        return true;  } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 167ca13c19c1..8dfbc4e1c434 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -17,6 +17,7 @@  #include "llvm/CodeGen/DIE.h"  #include "llvm/IR/Metadata.h"  #include "llvm/Support/Allocator.h" +#include <map>  #include <memory>  #include <utility> @@ -43,8 +44,23 @@ class DwarfFile {    DwarfStringPool StrPool; -  // Collection of dbg variables of a scope. -  DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables; +  /// DWARF v5: The symbol that designates the start of the contribution to +  /// the string offsets table. The contribution is shared by all units. +  MCSymbol *StringOffsetsStartSym = nullptr; + +  /// DWARF v5: The symbol that designates the base of the range list table. +  /// The table is shared by all units. +  MCSymbol *RnglistsTableBaseSym = nullptr; + +  /// The variables of a lexical scope. +  struct ScopeVars { +    /// We need to sort Args by ArgNo and check for duplicates. This could also +    /// be implemented as a list or vector + std::lower_bound(). +    std::map<unsigned, DbgVariable *> Args; +    SmallVector<DbgVariable *, 8> Locals; +  }; +  /// Collection of DbgVariables of each lexical scope. +  DenseMap<LexicalScope *, ScopeVars> ScopeVariables;    // Collection of abstract subprogram DIEs.    DenseMap<const MDNode *, DIE *> AbstractSPDies; @@ -62,39 +78,51 @@ public:      return CUs;    } -  /// \brief Compute the size and offset of a DIE given an incoming Offset. +  /// Compute the size and offset of a DIE given an incoming Offset.    unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); -  /// \brief Compute the size and offset of all the DIEs. +  /// Compute the size and offset of all the DIEs.    void computeSizeAndOffsets(); -  /// \brief Compute the size and offset of all the DIEs in the given unit. +  /// Compute the size and offset of all the DIEs in the given unit.    /// \returns The size of the root DIE.    unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU); -  /// \brief Add a unit to the list of CUs. +  /// Add a unit to the list of CUs.    void addUnit(std::unique_ptr<DwarfCompileUnit> U); -  /// \brief Emit all of the units to the section listed with the given +  /// Emit all of the units to the section listed with the given    /// abbreviation section.    void emitUnits(bool UseOffsets); -  /// \brief Emit the given unit to its section. +  /// Emit the given unit to its section.    void emitUnit(DwarfUnit *U, bool UseOffsets); -  /// \brief Emit a set of abbreviations to the specific section. +  /// Emit a set of abbreviations to the specific section.    void emitAbbrevs(MCSection *); -  /// \brief Emit all of the strings to the section given. -  void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr); +  /// Emit all of the strings to the section given. If OffsetSection is +  /// non-null, emit a table of string offsets to it. If UseRelativeOffsets +  /// is false, emit absolute offsets to the strings. Otherwise, emit +  /// relocatable references to the strings if they are supported by the target. +  void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr, +                   bool UseRelativeOffsets = false); -  /// \brief Returns the string pool. +  /// Returns the string pool.    DwarfStringPool &getStringPool() { return StrPool; } +  MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; } + +  void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; } + +  MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; } + +  void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; } +    /// \returns false if the variable was merged with a previous one.    bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); -  DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() { +  DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() {      return ScopeVariables;    } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index aa5f01e88933..a61fa83cfb03 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -39,8 +39,30 @@ DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,    return EntryRef(*I.first);  } +void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, +                                                   MCSection *Section, +                                                   MCSymbol *StartSym) { +  if (empty()) +    return; +  Asm.OutStreamer->SwitchSection(Section); +  unsigned EntrySize = 4; +  // FIXME: DWARF64 +  // We are emitting the header for a contribution to the string offsets +  // table. The header consists of an entry with the contribution's +  // size (not including the size of the length field), the DWARF version and +  // 2 bytes of padding. +  Asm.emitInt32(size() * EntrySize + 4); +  Asm.emitInt16(Asm.getDwarfVersion()); +  Asm.emitInt16(0); +  // Define the symbol that marks the start of the contribution. It is +  // referenced by most unit headers via DW_AT_str_offsets_base. +  // Split units do not use the attribute. +  if (StartSym) +    Asm.OutStreamer->EmitLabel(StartSym); +} +  void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, -                           MCSection *OffsetSection) { +                           MCSection *OffsetSection, bool UseRelativeOffsets) {    if (Pool.empty())      return; @@ -74,6 +96,9 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,      Asm.OutStreamer->SwitchSection(OffsetSection);      unsigned size = 4; // FIXME: DWARF64 is 8.      for (const auto &Entry : Entries) -      Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size); +      if (UseRelativeOffsets) +        Asm.emitDwarfStringOffset(Entry->getValue()); +      else +        Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);    }  } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h index 1cac3b7c8432..6e6988ea4ad4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -19,6 +19,7 @@ namespace llvm {  class AsmPrinter;  class MCSection; +class MCSymbol;  // Collection of strings for this unit and assorted symbols.  // A String->Symbol mapping of strings used by indirect @@ -36,11 +37,17 @@ public:    DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix); +  void emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSection *OffsetSection, +                                    MCSymbol *StartSym); +    void emit(AsmPrinter &Asm, MCSection *StrSection, -            MCSection *OffsetSection = nullptr); +            MCSection *OffsetSection = nullptr, +            bool UseRelativeOffsets = false);    bool empty() const { return Pool.empty(); } +  unsigned size() const { return Pool.size(); } +    /// Get a reference to an entry in the string pool.    EntryRef getEntry(AsmPrinter &Asm, StringRef Str);  }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 4ea59f504bd4..43b835b2c4aa 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -19,10 +19,10 @@  #include "llvm/ADT/APFloat.h"  #include "llvm/ADT/APInt.h"  #include "llvm/ADT/None.h" +#include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/iterator_range.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/IR/Constants.h" @@ -30,12 +30,14 @@  #include "llvm/IR/GlobalValue.h"  #include "llvm/IR/Metadata.h"  #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h"  #include "llvm/MC/MCDwarf.h"  #include "llvm/MC/MCSection.h"  #include "llvm/MC/MCStreamer.h"  #include "llvm/MC/MachineLocation.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include <cassert>  #include <cstdint>  #include <string> @@ -45,11 +47,6 @@ using namespace llvm;  #define DEBUG_TYPE "dwarfdebug" -static cl::opt<bool> -GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, -                       cl::desc("Generate DWARF4 type units."), -                       cl::init(false)); -  DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,                                         DIELoc &DIE)      : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU), @@ -83,8 +80,6 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,                               MCDwarfDwoLineTable *SplitLineTable)      : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),        SplitLineTable(SplitLineTable) { -  if (SplitLineTable) -    addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);  }  DwarfUnit::~DwarfUnit() { @@ -185,7 +180,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {      return false;    return (isa<DIType>(D) ||            (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && -         !GenerateDwarfTypeUnits; +         !DD->generateTypeUnits();  }  DIE *DwarfUnit::getDIE(const DINode *D) const { @@ -239,9 +234,28 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,  void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,                            StringRef String) { -  Die.addValue(DIEValueAllocator, Attribute, -               isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp, -               DIEString(DU->getStringPool().getEntry(*Asm, String))); +  if (DD->useInlineStrings()) { +    Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string, +                 new (DIEValueAllocator) +                     DIEInlineString(String, DIEValueAllocator)); +    return; +  } +  auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String); +  dwarf::Form IxForm = +      isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp; +  // For DWARF v5 and beyond, use the smallest strx? form possible. +  if (useSegmentedStringOffsetsTable()) { +    IxForm = dwarf::DW_FORM_strx1; +    unsigned Index = StringPoolEntry.getIndex(); +    if (Index > 0xffffff) +      IxForm = dwarf::DW_FORM_strx4; +    else if (Index > 0xffff) +      IxForm = dwarf::DW_FORM_strx3; +    else if (Index > 0xff) +      IxForm = dwarf::DW_FORM_strx2; +  } +  Die.addValue(DIEValueAllocator, Attribute, IxForm, +               DIEString(StringPoolEntry));  }  DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die, @@ -263,9 +277,33 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,      addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);  } -unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { -  return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) -                        : getCU().getOrCreateSourceID(FileName, DirName); +MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) const { +  assert(File); +  if (DD->getDwarfVersion() < 5) +    return nullptr; +  Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); +  if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) +    return nullptr; + +  // Convert the string checksum to an MD5Result for the streamer. +  // The verifier validates the checksum so we assume it's okay. +  // An MD5 checksum is 16 bytes. +  std::string ChecksumString = fromHex(Checksum->Value); +  void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1); +  memcpy(CKMem, ChecksumString.data(), 16); +  return reinterpret_cast<MD5::MD5Result *>(CKMem); +} + +unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { +  if (!SplitLineTable) +    return getCU().getOrCreateSourceID(File); +  if (!UsedLineTable) { +    UsedLineTable = true; +    // This is a split type unit that needs a line table. +    addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0); +  } +  return SplitLineTable->getFile(File->getDirectory(), File->getFilename(), +                                 getMD5AsBytes(File), File->getSource());  }  void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { @@ -335,12 +373,11 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,    Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block);  } -void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, -                              StringRef Directory) { +void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {    if (Line == 0)      return; -  unsigned FileID = getOrCreateSourceID(File, Directory); +  unsigned FileID = getOrCreateSourceID(File);    assert(FileID && "Invalid file id");    addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);    addUInt(Die, dwarf::DW_AT_decl_line, None, Line); @@ -349,32 +386,31 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,  void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {    assert(V); -  addSourceLine(Die, V->getLine(), V->getScope()->getFilename(), -                V->getScope()->getDirectory()); +  addSourceLine(Die, V->getLine(), V->getFile());  }  void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {    assert(G); -  addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory()); +  addSourceLine(Die, G->getLine(), G->getFile());  }  void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {    assert(SP); -  addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory()); +  addSourceLine(Die, SP->getLine(), SP->getFile());  }  void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {    assert(Ty); -  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); +  addSourceLine(Die, Ty->getLine(), Ty->getFile());  }  void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {    assert(Ty); -  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); +  addSourceLine(Die, Ty->getLine(), Ty->getFile());  }  /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -727,7 +763,7 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {    else if (auto *STy = dyn_cast<DISubroutineType>(Ty))      constructTypeDIE(TyDIE, STy);    else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { -    if (GenerateDwarfTypeUnits && !Ty->isForwardDecl()) +    if (DD->generateTypeUnits() && !Ty->isForwardDecl())        if (MDString *TypeId = CTy->getRawIdentifier()) {          DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);          // Skip updating the accelerator tables since this is not the full type. @@ -917,9 +953,24 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {    case dwarf::DW_TAG_enumeration_type:      constructEnumTypeDIE(Buffer, CTy);      break; +  case dwarf::DW_TAG_variant_part:    case dwarf::DW_TAG_structure_type:    case dwarf::DW_TAG_union_type:    case dwarf::DW_TAG_class_type: { +    // Emit the discriminator for a variant part. +    DIDerivedType *Discriminator = nullptr; +    if (Tag == dwarf::DW_TAG_variant_part) { +      Discriminator = CTy->getDiscriminator(); +      if (Discriminator) { +        // DWARF says: +        //    If the variant part has a discriminant, the discriminant is +        //    represented by a separate debugging information entry which is +        //    a child of the variant part entry. +        DIE &DiscMember = constructMemberDIE(Buffer, Discriminator); +        addDIEEntry(Buffer, dwarf::DW_AT_discr, DiscMember); +      } +    } +      // Add elements to structure type.      DINodeArray Elements = CTy->getElements();      for (const auto *Element : Elements) { @@ -933,6 +984,18 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {            addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);          } else if (DDTy->isStaticMember()) {            getOrCreateStaticMemberDIE(DDTy); +        } else if (Tag == dwarf::DW_TAG_variant_part) { +          // When emitting a variant part, wrap each member in +          // DW_TAG_variant. +          DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer); +          if (const ConstantInt *CI = +              dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) { +            if (isUnsignedDIType(DD, resolve(Discriminator->getBaseType()))) +              addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue()); +            else +              addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue()); +          } +          constructMemberDIE(Variant, DDTy);          } else {            constructMemberDIE(Buffer, DDTy);          } @@ -952,6 +1015,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {          if (unsigned PropertyAttributes = Property->getAttributes())            addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,                    PropertyAttributes); +      } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { +        if (Composite->getTag() == dwarf::DW_TAG_variant_part) { +          DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer); +          constructTypeDIE(VariantPart, Composite); +        }        }      } @@ -975,6 +1043,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {          Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)        addTemplateParams(Buffer, CTy->getTemplateParams()); +    // Add the type's non-standard calling convention. +    uint8_t CC = 0; +    if (CTy->isTypePassByValue()) +      CC = dwarf::DW_CC_pass_by_value; +    else if (CTy->isTypePassByReference()) +      CC = dwarf::DW_CC_pass_by_reference; +    if (CC) +      addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, +              CC);      break;    }    default: @@ -1152,9 +1229,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,      // Look at the Decl's linkage name only if we emitted it.      if (DD->useAllLinkageNames())        DeclLinkageName = SPDecl->getLinkageName(); -    unsigned DeclID = -        getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory()); -    unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory()); +    unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); +    unsigned DefID = getOrCreateSourceID(SP->getFile());      if (DeclID != DefID)        addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); @@ -1304,14 +1380,17 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,    // DW_AT_lower_bound and DW_AT_count attributes.    int64_t LowerBound = SR->getLowerBound();    int64_t DefaultLowerBound = getDefaultLowerBound(); -  int64_t Count = SR->getCount(); +  int64_t Count = -1; +  if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>()) +    Count = CI->getSExtValue();    if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)      addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); -  if (Count != -1) -    // FIXME: An unbounded array should reference the expression that defines -    // the array. +  if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) { +    if (auto *CountVarDIE = getDIE(CV)) +      addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE); +  } else if (Count != -1)      addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);  } @@ -1320,16 +1399,49 @@ DIE *DwarfUnit::getIndexTyDie() {      return IndexTyDie;    // Construct an integer type to use for indexes.    IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie()); -  addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype"); +  StringRef Name = "__ARRAY_SIZE_TYPE__"; +  addString(*IndexTyDie, dwarf::DW_AT_name, Name);    addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));    addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,            dwarf::DW_ATE_unsigned); +  DD->addAccelType(Name, *IndexTyDie, /*Flags*/ 0);    return IndexTyDie;  } +/// Returns true if the vector's size differs from the sum of sizes of elements +/// the user specified.  This can occur if the vector has been rounded up to +/// fit memory alignment constraints. +static bool hasVectorBeenPadded(const DICompositeType *CTy) { +  assert(CTy && CTy->isVector() && "Composite type is not a vector"); +  const uint64_t ActualSize = CTy->getSizeInBits(); + +  // Obtain the size of each element in the vector. +  DIType *BaseTy = CTy->getBaseType().resolve(); +  assert(BaseTy && "Unknown vector element type."); +  const uint64_t ElementSize = BaseTy->getSizeInBits(); + +  // Locate the number of elements in the vector. +  const DINodeArray Elements = CTy->getElements(); +  assert(Elements.size() == 1 && +         Elements[0]->getTag() == dwarf::DW_TAG_subrange_type && +         "Invalid vector element array, expected one element of type subrange"); +  const auto Subrange = cast<DISubrange>(Elements[0]); +  const auto CI = Subrange->getCount().get<ConstantInt *>(); +  const int32_t NumVecElements = CI->getSExtValue(); + +  // Ensure we found the element count and that the actual size is wide +  // enough to contain the requested size. +  assert(ActualSize >= (NumVecElements * ElementSize) && "Invalid vector size"); +  return ActualSize != (NumVecElements * ElementSize); +} +  void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { -  if (CTy->isVector()) +  if (CTy->isVector()) {      addFlag(Buffer, dwarf::DW_AT_GNU_vector); +    if (hasVectorBeenPadded(CTy)) +      addUInt(Buffer, dwarf::DW_AT_byte_size, None, +              CTy->getSizeInBits() / CHAR_BIT); +  }    // Emit the element type.    addType(Buffer, resolve(CTy->getBaseType())); @@ -1350,6 +1462,15 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {  }  void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { +  const DIType *DTy = resolve(CTy->getBaseType()); +  bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy); +  if (DTy) { +    if (DD->getDwarfVersion() >= 3) +      addType(Buffer, DTy); +    if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagFixedEnum)) +      addFlag(Buffer, dwarf::DW_AT_enum_class); +  } +    DINodeArray Elements = CTy->getElements();    // Add enumerators to enumeration type. @@ -1359,16 +1480,10 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {        DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);        StringRef Name = Enum->getName();        addString(Enumerator, dwarf::DW_AT_name, Name); -      int64_t Value = Enum->getValue(); -      addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, -              Value); +      auto Value = static_cast<uint64_t>(Enum->getValue()); +      addConstantValue(Enumerator, IsUnsigned, Value);      }    } -  const DIType *DTy = resolve(CTy->getBaseType()); -  if (DTy) { -    addType(Buffer, DTy); -    addFlag(Buffer, dwarf::DW_AT_enum_class); -  }  }  void DwarfUnit::constructContainingTypeDIEs() { @@ -1385,7 +1500,7 @@ void DwarfUnit::constructContainingTypeDIEs() {    }  } -void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { +DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {    DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);    StringRef Name = DT->getName();    if (!Name.empty()) @@ -1490,6 +1605,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {    if (DT->isArtificial())      addFlag(MemberDie, dwarf::DW_AT_artificial); + +  return MemberDie;  }  DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { @@ -1542,18 +1659,18 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {  void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {    // Emit size of content not including length itself    Asm->OutStreamer->AddComment("Length of Unit"); -  Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize()); +  Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());    Asm->OutStreamer->AddComment("DWARF version number");    unsigned Version = DD->getDwarfVersion(); -  Asm->EmitInt16(Version); +  Asm->emitInt16(Version);    // DWARF v5 reorders the address size and adds a unit type.    if (Version >= 5) {      Asm->OutStreamer->AddComment("DWARF Unit Type"); -    Asm->EmitInt8(UT); +    Asm->emitInt8(UT);      Asm->OutStreamer->AddComment("Address Size (in bytes)"); -    Asm->EmitInt8(Asm->MAI->getCodePointerSize()); +    Asm->emitInt8(Asm->MAI->getCodePointerSize());    }    // We share one abbreviations table across all units so it's always at the @@ -1562,14 +1679,14 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {    Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");    const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();    if (UseOffsets) -    Asm->EmitInt32(0); +    Asm->emitInt32(0);    else      Asm->emitDwarfSymbolReference(          TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);    if (Version <= 4) {      Asm->OutStreamer->AddComment("Address Size (in bytes)"); -    Asm->EmitInt8(Asm->MAI->getCodePointerSize()); +    Asm->emitInt8(Asm->MAI->getCodePointerSize());    }  } @@ -1628,3 +1745,19 @@ const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {      return nullptr;    return getSection()->getBeginSymbol();  } + +void DwarfUnit::addStringOffsetsStart() { +  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); +  addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base, +                  DU->getStringOffsetsStartSym(), +                  TLOF.getDwarfStrOffSection()->getBeginSymbol()); +} + +void DwarfUnit::addRnglistsBase() { +  assert(DD->getDwarfVersion() >= 5 && +         "DW_AT_rnglists_base requires DWARF version 5 or later"); +  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); +  addSectionLabel(getUnitDie(), dwarf::DW_AT_rnglists_base, +                  DU->getRnglistsTableBaseSym(), +                  TLOF.getDwarfRnglistsSection()->getBeginSymbol()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 4cc01b3298d4..69696f626536 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -98,7 +98,7 @@ protected:    /// corresponds to the MDNode mapped with the subprogram DIE.    DenseMap<DIE *, const DINode *> ContainingTypeMap; -  DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, +  DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW,              DwarfFile *DWU);    bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); @@ -112,6 +112,8 @@ public:    uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }    const DICompileUnit *getCUNode() const { return CUNode; } +  uint16_t getDwarfVersion() const { return DD->getDwarfVersion(); } +    /// Return true if this compile unit has something to write out.    bool hasContent() const { return getUnitDie().hasChildren(); } @@ -185,7 +187,7 @@ public:    /// Add a dwarf op address data and value using the form given and an    /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index. -  void addOpAddress(DIELoc &Die, const MCSymbol *Label); +  void addOpAddress(DIELoc &Die, const MCSymbol *Sym);    /// Add a label delta attribute data and value.    void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, @@ -201,14 +203,13 @@ public:    void addDIETypeSignature(DIE &Die, uint64_t Signature);    /// Add block data. -  void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); +  void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc);    /// Add block data.    void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);    /// Add location information to specified debug information entry. -  void addSourceLine(DIE &Die, unsigned Line, StringRef File, -                     StringRef Directory); +  void addSourceLine(DIE &Die, unsigned Line, const DIFile *File);    void addSourceLine(DIE &Die, const DILocalVariable *V);    void addSourceLine(DIE &Die, const DIGlobalVariable *G);    void addSourceLine(DIE &Die, const DISubprogram *SP); @@ -259,7 +260,7 @@ public:                                   bool SkipSPAttributes = false);    /// Find existing DIE or create new DIE for the given type. -  DIE *getOrCreateTypeDIE(const MDNode *N); +  DIE *getOrCreateTypeDIE(const MDNode *TyNode);    /// Get context owner's DIE.    DIE *getOrCreateContextDIE(const DIScope *Context); @@ -274,6 +275,10 @@ public:    /// call insertDIE if MD is not null.    DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr); +  bool useSegmentedStringOffsetsTable() const { +    return DD->useSegmentedStringOffsetsTable(); +  } +    /// Compute the size of a header for this unit, not including the initial    /// length field.    virtual unsigned getHeaderSize() const { @@ -287,6 +292,12 @@ public:    /// Emit the header for this unit, not including the initial length field.    virtual void emitHeader(bool UseOffsets) = 0; +  /// Add the DW_AT_str_offsets_base attribute to the unit DIE. +  void addStringOffsetsStart(); + +  /// Add the DW_AT_rnglists_base attribute to the unit DIE. +  void addRnglistsBase(); +    virtual DwarfCompileUnit &getCU() = 0;    void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); @@ -300,15 +311,19 @@ public:                                        const MCSymbol *Label,                                        const MCSymbol *Sec); +  /// If the \p File has an MD5 checksum, return it as an MD5Result +  /// allocated in the MCContext. +  MD5::MD5Result *getMD5AsBytes(const DIFile *File) const; +  protected:    ~DwarfUnit();    /// Create new static data member DIE.    DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); -  /// Look up the source ID with the given directory and source file names. If -  /// none currently exists, create a new ID and insert it in the line table. -  virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; +  /// Look up the source ID for the given file. If none currently exists, +  /// create a new ID and insert it in the line table. +  virtual unsigned getOrCreateSourceID(const DIFile *File) = 0;    /// Look in the DwarfDebug map for the MDNode that corresponds to the    /// reference. @@ -327,11 +342,11 @@ protected:  private:    void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);    void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy); -  void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy); +  void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy);    void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);    void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);    void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy); -  void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT); +  DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);    void constructTemplateTypeParameterDIE(DIE &Buffer,                                           const DITemplateTypeParameter *TP);    void constructTemplateValueParameterDIE(DIE &Buffer, @@ -357,8 +372,9 @@ class DwarfTypeUnit final : public DwarfUnit {    const DIE *Ty;    DwarfCompileUnit &CU;    MCDwarfDwoLineTable *SplitLineTable; +  bool UsedLineTable = false; -  unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; +  unsigned getOrCreateSourceID(const DIFile *File) override;    bool isDwoUnit() const override;  public: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 3cdab57bca70..65de9d7e65a4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -20,7 +20,6 @@  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Function.h"  #include "llvm/MC/MCAsmInfo.h" @@ -30,6 +29,7 @@  #include "llvm/MC/MCTargetOptions.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/LEB128.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include <algorithm>  #include <cassert>  #include <cstdint> @@ -58,10 +58,10 @@ unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,  /// Compute the actions table and gather the first action index for each landing  /// pad site. -unsigned EHStreamer:: -computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, -                    SmallVectorImpl<ActionEntry> &Actions, -                    SmallVectorImpl<unsigned> &FirstActions) { +void EHStreamer::computeActionsTable( +    const SmallVectorImpl<const LandingPadInfo *> &LandingPads, +    SmallVectorImpl<ActionEntry> &Actions, +    SmallVectorImpl<unsigned> &FirstActions) {    // The action table follows the call-site table in the LSDA. The individual    // records are of two types:    // @@ -149,7 +149,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,        FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;      } // else identical - re-use previous FirstAction -    // Information used when created the call-site table. The action record +    // Information used when creating the call-site table. The action record      // field of the call site record is the offset of the first associated      // action record, relative to the start of the actions table. This value is      // biased by 1 (1 indicating the start of the actions table), and 0 @@ -161,8 +161,6 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,      PrevLPI = LPI;    } - -  return SizeActions;  }  /// Return `true' if this is a call to a function marked `nounwind'. Return @@ -361,55 +359,33 @@ void EHStreamer::emitExceptionTable() {      LandingPads.push_back(&PadInfos[i]);    // Order landing pads lexicographically by type id. -  std::sort(LandingPads.begin(), LandingPads.end(), -            [](const LandingPadInfo *L, -               const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; }); +  llvm::sort(LandingPads.begin(), LandingPads.end(), +             [](const LandingPadInfo *L, +                const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });    // Compute the actions table and gather the first action index for each    // landing pad site.    SmallVector<ActionEntry, 32> Actions;    SmallVector<unsigned, 64> FirstActions; -  unsigned SizeActions = -    computeActionsTable(LandingPads, Actions, FirstActions); +  computeActionsTable(LandingPads, Actions, FirstActions);    // Compute the call-site table.    SmallVector<CallSiteEntry, 64> CallSites;    computeCallSiteTable(CallSites, LandingPads, FirstActions); -  // Final tallies. - -  // Call sites.    bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; -  bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; - -  unsigned CallSiteTableLength; -  if (IsSJLJ) -    CallSiteTableLength = 0; -  else { -    unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4 -    unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 -    unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 -    CallSiteTableLength = -      CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); -  } - -  for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { -    CallSiteTableLength += getULEB128Size(CallSites[i].Action); -    if (IsSJLJ) -      CallSiteTableLength += getULEB128Size(i); -  } +  unsigned CallSiteEncoding = +      IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128; +  bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();    // Type infos.    MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();    unsigned TTypeEncoding; -  unsigned TypeFormatSize;    if (!HaveTTData) { -    // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say -    // that we're omitting that bit. +    // If there is no TypeInfo, then we just explicitly say that we're omitting +    // that bit.      TTypeEncoding = dwarf::DW_EH_PE_omit; -    // dwarf::DW_EH_PE_absptr -    TypeFormatSize = Asm->getDataLayout().getPointerSize();    } else {      // Okay, we have actual filters or typeinfos to emit.  As such, we need to      // pick a type encoding for them.  We're about to emit a list of pointers to @@ -439,7 +415,6 @@ void EHStreamer::emitExceptionTable() {      // in target-independent code.      //      TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); -    TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);    }    // Begin the exception table. @@ -460,64 +435,35 @@ void EHStreamer::emitExceptionTable() {    Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");    Asm->EmitEncodingByte(TTypeEncoding, "@TType"); -  // The type infos need to be aligned. GCC does this by inserting padding just -  // before the type infos. However, this changes the size of the exception -  // table, so you need to take this into account when you output the exception -  // table size. However, the size is output using a variable length encoding. -  // So by increasing the size by inserting padding, you may increase the number -  // of bytes used for writing the size. If it increases, say by one byte, then -  // you now need to output one less byte of padding to get the type infos -  // aligned. However this decreases the size of the exception table. This -  // changes the value you have to output for the exception table size. Due to -  // the variable length encoding, the number of bytes used for writing the -  // length may decrease. If so, you then have to increase the amount of -  // padding. And so on. If you look carefully at the GCC code you will see that -  // it indeed does this in a loop, going on and on until the values stabilize. -  // We chose another solution: don't output padding inside the table like GCC -  // does, instead output it before the table. -  unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; -  unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); -  unsigned TTypeBaseOffset = -    sizeof(int8_t) +                            // Call site format -    CallSiteTableLengthSize +                   // Call site table length size -    CallSiteTableLength +                       // Call site table length -    SizeActions +                               // Actions size -    SizeTypes; -  unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); -  unsigned TotalSize = -    sizeof(int8_t) +                            // LPStart format -    sizeof(int8_t) +                            // TType format -    (HaveTTData ? TTypeBaseOffsetSize : 0) +    // TType base offset size -    TTypeBaseOffset;                            // TType base offset -  unsigned PadBytes = (4 - TotalSize) & 3; - +  MCSymbol *TTBaseLabel = nullptr;    if (HaveTTData) { -    // Account for any extra padding that will be added to the call site table -    // length. -    Asm->EmitPaddedULEB128(TTypeBaseOffset, TTypeBaseOffsetSize + PadBytes, -                           "@TType base offset"); -    PadBytes = 0; +    // N.B.: There is a dependency loop between the size of the TTBase uleb128 +    // here and the amount of padding before the aligned type table. The +    // assembler must sometimes pad this uleb128 or insert extra padding before +    // the type table. See PR35809 or GNU as bug 4029. +    MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); +    TTBaseLabel = Asm->createTempSymbol("ttbase"); +    Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); +    Asm->OutStreamer->EmitLabel(TTBaseRefLabel);    }    bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); +  // Emit the landing pad call site table. +  MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); +  MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end"); +  Asm->EmitEncodingByte(CallSiteEncoding, "Call site"); +  Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); +  Asm->OutStreamer->EmitLabel(CstBeginLabel); +    // SjLj Exception handling    if (IsSJLJ) { -    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - -    // Add extra padding if it wasn't added to the TType base offset. -    Asm->EmitPaddedULEB128(CallSiteTableLength, -                           CallSiteTableLengthSize + PadBytes, -                           "Call site table length"); - -    // Emit the landing pad site information.      unsigned idx = 0;      for (SmallVectorImpl<CallSiteEntry>::const_iterator           I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {        const CallSiteEntry &S = *I; -      // Offset of the landing pad, counted in 16-byte bundles relative to the -      // @LPStart address. +      // Index of the call site entry.        if (VerboseAsm) {          Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");          Asm->OutStreamer->AddComment("  On exception at call site "+Twine(idx)); @@ -557,14 +503,6 @@ void EHStreamer::emitExceptionTable() {      // A missing entry in the call-site table indicates that a call is not      // supposed to throw. -    // Emit the landing pad call site table. -    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - -    // Add extra padding if it wasn't added to the TType base offset. -    Asm->EmitPaddedULEB128(CallSiteTableLength, -                           CallSiteTableLengthSize + PadBytes, -                           "Call site table length"); -      unsigned Entry = 0;      for (SmallVectorImpl<CallSiteEntry>::const_iterator           I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { @@ -579,29 +517,27 @@ void EHStreamer::emitExceptionTable() {        if (!EndLabel)          EndLabel = Asm->getFunctionEnd(); -      // Offset of the call site relative to the previous call site, counted in -      // number of 16-byte bundles. The first call site is counted relative to -      // the start of the procedure fragment. +      // Offset of the call site relative to the start of the procedure.        if (VerboseAsm)          Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); -      Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); +      Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym);        if (VerboseAsm)          Asm->OutStreamer->AddComment(Twine("  Call between ") +                                       BeginLabel->getName() + " and " +                                       EndLabel->getName()); -      Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); +      Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel); -      // Offset of the landing pad, counted in 16-byte bundles relative to the -      // @LPStart address. +      // Offset of the landing pad relative to the start of the procedure.        if (!S.LPad) {          if (VerboseAsm)            Asm->OutStreamer->AddComment("    has no landing pad"); -        Asm->OutStreamer->EmitIntValue(0, 4/*size*/); +        Asm->EmitULEB128(0);        } else {          if (VerboseAsm)            Asm->OutStreamer->AddComment(Twine("    jumps to ") +                                         S.LPad->LandingPadLabel->getName()); -        Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); +        Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel, +                                          EHFuncBeginSym);        }        // Offset of the first associated action record, relative to the start of @@ -617,6 +553,7 @@ void EHStreamer::emitExceptionTable() {        Asm->EmitULEB128(S.Action);      }    } +  Asm->OutStreamer->EmitLabel(CstEndLabel);    // Emit the Action Table.    int Entry = 0; @@ -660,12 +597,15 @@ void EHStreamer::emitExceptionTable() {      Asm->EmitSLEB128(Action.NextAction);    } -  emitTypeInfos(TTypeEncoding); +  if (HaveTTData) { +    Asm->EmitAlignment(2); +    emitTypeInfos(TTypeEncoding, TTBaseLabel); +  }    Asm->EmitAlignment(2);  } -void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {    const MachineFunction *MF = Asm->MF;    const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();    const std::vector<unsigned> &FilterIds = MF->getFilterIds(); @@ -687,6 +627,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {      Asm->EmitTTypeReference(GV, TTypeEncoding);    } +  Asm->OutStreamer->EmitLabel(TTBaseLabel); +    // Emit the Exception Specifications.    if (VerboseAsm && !FilterIds.empty()) {      Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index 7962b761d8de..b89421a1e067 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -72,9 +72,9 @@ protected:    /// Compute the actions table and gather the first action index for each    /// landing pad site. -  unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, -                               SmallVectorImpl<ActionEntry> &Actions, -                               SmallVectorImpl<unsigned> &FirstActions); +  void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, +                           SmallVectorImpl<ActionEntry> &Actions, +                           SmallVectorImpl<unsigned> &FirstActions);    void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,                       RangeMapType &PadMap); @@ -86,7 +86,7 @@ protected:    /// no entry and must not be contained in the try-range of any entry - they    /// form gaps in the table.  Entries must be ordered by try-range address.    void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, -                            const SmallVectorImpl<const LandingPadInfo *> &LPs, +                            const SmallVectorImpl<const LandingPadInfo *> &LandingPads,                              const SmallVectorImpl<unsigned> &FirstActions);    /// Emit landing pads and actions. @@ -110,9 +110,9 @@ protected:    ///     catches in the function.  This tables is reversed indexed base 1.    void emitExceptionTable(); -  virtual void emitTypeInfos(unsigned TTypeEncoding); +  virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel); -  // Helpers for for identifying what kind of clause an EH typeid or selector +  // Helpers for identifying what kind of clause an EH typeid or selector    // corresponds to. Negative selectors are for filter clauses, the zero    // selector is for cleanups, and positive selectors are for catch clauses.    static bool isFilterEHSelector(int Selector) { return Selector < 0; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index e459c02c9a6e..49cc376fcc98 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -19,7 +19,6 @@  #include "llvm/CodeGen/GCMetadataPrinter.h"  #include "llvm/CodeGen/GCStrategy.h"  #include "llvm/CodeGen/GCs.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Module.h" @@ -27,6 +26,7 @@  #include "llvm/MC/MCSectionELF.h"  #include "llvm/MC/MCStreamer.h"  #include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  using namespace llvm; @@ -77,7 +77,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,      // Emit PointCount.      OS.AddComment("safe point count"); -    AP.EmitInt16(MD.size()); +    AP.emitInt16(MD.size());      // And each safe point...      for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; @@ -94,7 +94,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,      // Emit the stack frame size.      OS.AddComment("stack frame size (in words)"); -    AP.EmitInt16(MD.getFrameSize() / IntPtrSize); +    AP.emitInt16(MD.getFrameSize() / IntPtrSize);      // Emit stack arity, i.e. the number of stacked arguments.      unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; @@ -102,11 +102,11 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,                                ? MD.getFunction().arg_size() - RegisteredArgs                                : 0;      OS.AddComment("stack arity"); -    AP.EmitInt16(StackArity); +    AP.emitInt16(StackArity);      // Emit the number of live roots in the function.      OS.AddComment("live root count"); -    AP.EmitInt16(MD.live_size(PI)); +    AP.emitInt16(MD.live_size(PI));      // And for each live root...      for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), @@ -114,7 +114,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,           LI != LE; ++LI) {        // Emit live root's offset within the stack frame.        OS.AddComment("stack index (offset / wordsize)"); -      AP.EmitInt16(LI->StackOffset / IntPtrSize); +      AP.emitInt16(LI->StackOffset / IntPtrSize);      }    }  } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index e0cc241dd23f..59a57ed30d10 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -18,7 +18,6 @@  #include "llvm/CodeGen/GCMetadata.h"  #include "llvm/CodeGen/GCMetadataPrinter.h"  #include "llvm/CodeGen/GCs.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Mangler.h" @@ -27,6 +26,7 @@  #include "llvm/MC/MCDirectives.h"  #include "llvm/MC/MCStreamer.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include <cctype>  #include <cstddef>  #include <cstdint> @@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,      // Very rude!      report_fatal_error(" Too much descriptor for ocaml GC");    } -  AP.EmitInt16(NumDescriptors); +  AP.emitInt16(NumDescriptors);    AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);    for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), @@ -166,8 +166,8 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,        }        AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize); -      AP.EmitInt16(FrameSize); -      AP.EmitInt16(LiveCount); +      AP.emitInt16(FrameSize); +      AP.emitInt16(LiveCount);        for (GCFunctionInfo::live_iterator K = FI.live_begin(J),                                           KE = FI.live_end(J); @@ -178,7 +178,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,                "GC root stack offset is outside of fixed stack frame and out "                "of range for ocaml GC!");          } -        AP.EmitInt16(K->StackOffset); +        AP.emitInt16(K->StackOffset);        }        AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp new file mode 100644 index 000000000000..18d37caf57ee --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -0,0 +1,45 @@ +//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "WinCFGuard.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCStreamer.h" + +#include <vector> + +using namespace llvm; + +WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {} + +WinCFGuard::~WinCFGuard() {} + +void WinCFGuard::endModule() { +  const Module *M = Asm->MMI->getModule(); +  std::vector<const Function *> Functions; +  for (const Function &F : *M) +    if (F.hasAddressTaken()) +      Functions.push_back(&F); +  if (Functions.empty()) +    return; +  auto &OS = *Asm->OutStreamer; +  OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); +  for (const Function *F : Functions) +    OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h new file mode 100644 index 000000000000..124e8f04bfad --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -0,0 +1,54 @@ +//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H + +#include "AsmPrinterHandler.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler { +  /// Target of directive emission. +  AsmPrinter *Asm; + +public: +  WinCFGuard(AsmPrinter *A); +  ~WinCFGuard() override; + +  void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + +  /// Emit the Control Flow Guard function ID table +  void endModule() override; + +  /// Gather pre-function debug information. +  /// Every beginFunction(MF) call should be followed by an endFunction(MF) +  /// call. +  void beginFunction(const MachineFunction *MF) override {} + +  /// Gather post-function debug information. +  /// Please note that some AsmPrinter implementations may not call +  /// beginFunction at all. +  void endFunction(const MachineFunction *MF) override {} + +  /// Process beginning of an instruction. +  void beginInstruction(const MachineInstr *MI) override {} + +  /// Process end of an instruction. +  void endInstruction() override {} +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index a6a8e84a949f..eff73a58d8d2 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -21,7 +21,6 @@  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/WinEHFuncInfo.h"  #include "llvm/IR/DataLayout.h" @@ -35,6 +34,7 @@  #include "llvm/MC/MCSymbol.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetOptions.h"  using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h index 371061c2c2ec..eed3c4453ffc 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -100,7 +100,7 @@ public:    /// Gather and emit post-function exception information.    void endFunction(const MachineFunction *) override; -  /// \brief Emit target-specific EH funclet machinery. +  /// Emit target-specific EH funclet machinery.    void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override;    void endFunclet() override;  }; diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp index 7042bc997223..f2615edaece2 100644 --- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -98,8 +98,8 @@ namespace {          CreateCmpXchgInstFun CreateCmpXchg);      bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); -    bool isIdempotentRMW(AtomicRMWInst *AI); -    bool simplifyIdempotentRMW(AtomicRMWInst *AI); +    bool isIdempotentRMW(AtomicRMWInst *RMWI); +    bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);      bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,                                   Value *PointerOperand, Value *ValueOperand, @@ -379,8 +379,8 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {    NewLI->setAlignment(LI->getAlignment());    NewLI->setVolatile(LI->isVolatile());    NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); -  DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); -   +  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); +    Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());    LI->replaceAllUsesWith(NewVal);    LI->eraseFromParent(); @@ -462,7 +462,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {    NewSI->setAlignment(SI->getAlignment());    NewSI->setVolatile(SI->isVolatile());    NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); -  DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); +  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");    SI->eraseFromParent();    return NewSI;  } @@ -943,7 +943,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *                                              CI->getSyncScopeID());    NewCI->setVolatile(CI->isVolatile());    NewCI->setWeak(CI->isWeak()); -  DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); +  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");    Value *OldVal = Builder.CreateExtractValue(NewCI, 0);    Value *Succ = Builder.CreateExtractValue(NewCI, 1); diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index 7f358a679366..c7a0c6457164 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -152,7 +152,7 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,  void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {    assert(MBB->pred_empty() && "MBB must be dead!"); -  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); +  LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);    MachineFunction *MF = MBB->getParent();    // drop all successors. @@ -164,7 +164,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {    // Remove the block.    MF->erase(MBB); -  FuncletMembership.erase(MBB); +  EHScopeMembership.erase(MBB);    if (MLI)      MLI->removeBlock(MBB);  } @@ -199,8 +199,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,        MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());    } -  // Recalculate funclet membership. -  FuncletMembership = getFuncletMembership(MF); +  // Recalculate EH scope membership. +  EHScopeMembership = getEHScopeMembership(MF);    bool MadeChangeThisIteration = true;    while (MadeChangeThisIteration) { @@ -296,6 +296,11 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {    return HashMachineInstr(*I);  } +///  Whether MI should be counted as an instruction when calculating common tail. +static bool countsAsInstruction(const MachineInstr &MI) { +  return !(MI.isDebugValue() || MI.isCFIInstruction()); +} +  /// ComputeCommonTailLength - Given two machine basic blocks, compute the number  /// of instructions they actually have in common together at their end.  Return  /// iterators for the first shared instruction in each block. @@ -310,26 +315,27 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,    while (I1 != MBB1->begin() && I2 != MBB2->begin()) {      --I1; --I2;      // Skip debugging pseudos; necessary to avoid changing the code. -    while (I1->isDebugValue()) { +    while (!countsAsInstruction(*I1)) {        if (I1==MBB1->begin()) { -        while (I2->isDebugValue()) { -          if (I2==MBB2->begin()) +        while (!countsAsInstruction(*I2)) { +          if (I2==MBB2->begin()) {              // I1==DBG at begin; I2==DBG at begin -            return TailLen; +            goto SkipTopCFIAndReturn; +          }            --I2;          }          ++I2;          // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin -        return TailLen; +        goto SkipTopCFIAndReturn;        }        --I1;      }      // I1==first (untested) non-DBG preceding known match -    while (I2->isDebugValue()) { +    while (!countsAsInstruction(*I2)) {        if (I2==MBB2->begin()) {          ++I1;          // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin -        return TailLen; +        goto SkipTopCFIAndReturn;        }        --I2;      } @@ -352,7 +358,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,    // I1==MBB1->begin() work as expected.)    if (I1 == MBB1->begin() && I2 != MBB2->begin()) {      --I2; -    while (I2->isDebugValue()) { +    while (I2->isDebugInstr()) {        if (I2 == MBB2->begin())          return TailLen;        --I2; @@ -361,13 +367,44 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,    }    if (I2 == MBB2->begin() && I1 != MBB1->begin()) {      --I1; -    while (I1->isDebugValue()) { +    while (I1->isDebugInstr()) {        if (I1 == MBB1->begin())          return TailLen;        --I1;      }      ++I1;    } + +SkipTopCFIAndReturn: +  // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if +  // I1 and I2 are non-identical when compared and then one or both of them ends +  // up pointing to a CFI instruction after being incremented. For example: +  /* +    BB1: +    ... +    INSTRUCTION_A +    ADD32ri8  <- last common instruction +    ... +    BB2: +    ... +    INSTRUCTION_B +    CFI_INSTRUCTION +    ADD32ri8  <- last common instruction +    ... +  */ +  // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after +  // incrementing the iterators, I1 will point to ADD, however I2 will point to +  // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the +  // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI +  // instruction. +  while (I1 != MBB1->end() && I1->isCFIInstruction()) { +    ++I1; +  } + +  while (I2 != MBB2->end() && I2->isCFIInstruction()) { +    ++I2; +  } +    return TailLen;  } @@ -438,11 +475,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,    if (UpdateLiveIns)      computeAndAddLiveIns(LiveRegs, *NewMBB); -  // Add the new block to the funclet. -  const auto &FuncletI = FuncletMembership.find(&CurMBB); -  if (FuncletI != FuncletMembership.end()) { -    auto n = FuncletI->second; -    FuncletMembership[NewMBB] = n; +  // Add the new block to the EH scope. +  const auto &EHScopeI = EHScopeMembership.find(&CurMBB); +  if (EHScopeI != EHScopeMembership.end()) { +    auto n = EHScopeI->second; +    EHScopeMembership[NewMBB] = n;    }    return NewMBB; @@ -454,7 +491,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,                                  MachineBasicBlock::iterator E) {    unsigned Time = 0;    for (; I != E; ++I) { -    if (I->isDebugValue()) +    if (!countsAsInstruction(*I))        continue;      if (I->isCall())        Time += 10; @@ -589,7 +626,7 @@ static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {  /// SuccBB          A common successor of MBB1, MBB2 which are in a canonical form  ///                 relative to SuccBB  /// PredBB          The layout predecessor of SuccBB, if any. -/// FuncletMembership  map from block to funclet #. +/// EHScopeMembership  map from block to EH scope #.  /// AfterPlacement  True if we are merging blocks after layout. Stricter  ///                 thresholds apply to prevent undoing tail-duplication.  static bool @@ -598,24 +635,24 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,                    MachineBasicBlock::iterator &I1,                    MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,                    MachineBasicBlock *PredBB, -                  DenseMap<const MachineBasicBlock *, int> &FuncletMembership, +                  DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,                    bool AfterPlacement) { -  // It is never profitable to tail-merge blocks from two different funclets. -  if (!FuncletMembership.empty()) { -    auto Funclet1 = FuncletMembership.find(MBB1); -    assert(Funclet1 != FuncletMembership.end()); -    auto Funclet2 = FuncletMembership.find(MBB2); -    assert(Funclet2 != FuncletMembership.end()); -    if (Funclet1->second != Funclet2->second) +  // It is never profitable to tail-merge blocks from two different EH scopes. +  if (!EHScopeMembership.empty()) { +    auto EHScope1 = EHScopeMembership.find(MBB1); +    assert(EHScope1 != EHScopeMembership.end()); +    auto EHScope2 = EHScopeMembership.find(MBB2); +    assert(EHScope2 != EHScopeMembership.end()); +    if (EHScope1->second != EHScope2->second)        return false;    }    CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);    if (CommonTailLen == 0)      return false; -  DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1) -               << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen -               << '\n'); +  LLVM_DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1) +                    << " and " << printMBBReference(*MBB2) << " is " +                    << CommonTailLen << '\n');    // It's almost always profitable to merge any number of non-terminator    // instructions with the block that falls through into the common successor. @@ -706,7 +743,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,                              MinCommonTailLength,                              CommonTailLen, TrialBBI1, TrialBBI2,                              SuccBB, PredBB, -                            FuncletMembership, +                            EHScopeMembership,                              AfterBlockPlacement)) {          if (CommonTailLen > maxCommonTailLength) {            SameTails.clear(); @@ -770,8 +807,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,      SameTails[commonTailIndex].getTailStartPos();    MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); -  DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size " -               << maxCommonTailLength); +  LLVM_DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size " +                    << maxCommonTailLength);    // If the split block unconditionally falls-thru to SuccBB, it will be    // merged. In control flow terms it should then take SuccBB's name. e.g. If @@ -780,7 +817,7 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,      SuccBB->getBasicBlock() : MBB->getBasicBlock();    MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);    if (!newMBB) { -    DEBUG(dbgs() << "... failed!"); +    LLVM_DEBUG(dbgs() << "... failed!");      return false;    } @@ -814,12 +851,12 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,      assert(MBBI != MBBIE && "Reached BB end within common tail length!");      (void)MBBIE; -    if (MBBI->isDebugValue()) { +    if (!countsAsInstruction(*MBBI)) {        ++MBBI;        continue;      } -    while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) +    while ((MBBICommon != MBBIECommon) && !countsAsInstruction(*MBBICommon))        ++MBBICommon;      assert(MBBICommon != MBBIECommon && @@ -859,7 +896,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {    }    for (auto &MI : *MBB) { -    if (MI.isDebugValue()) +    if (!countsAsInstruction(MI))        continue;      DebugLoc DL = MI.getDebugLoc();      for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) { @@ -869,7 +906,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {        auto &Pos = NextCommonInsts[i];        assert(Pos != SameTails[i].getBlock()->end() &&            "Reached BB end within common tail"); -      while (Pos->isDebugValue()) { +      while (!countsAsInstruction(*Pos)) {          ++Pos;          assert(Pos != SameTails[i].getBlock()->end() &&              "Reached BB end within common tail"); @@ -884,11 +921,12 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {    if (UpdateLiveIns) {      LivePhysRegs NewLiveIns(*TRI);      computeLiveIns(NewLiveIns, *MBB); +    LiveRegs.init(*TRI);      // The flag merging may lead to some register uses no longer using the      // <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.      for (MachineBasicBlock *Pred : MBB->predecessors()) { -      LiveRegs.init(*TRI); +      LiveRegs.clear();        LiveRegs.addLiveOuts(*Pred);        MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();        for (unsigned Reg : NewLiveIns) { @@ -919,18 +957,19 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,                                        unsigned MinCommonTailLength) {    bool MadeChange = false; -  DEBUG(dbgs() << "\nTryTailMergeBlocks: "; -        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() -        << printMBBReference(*MergePotentials[i].getBlock()) -        << (i == e - 1 ? "" : ", "); -        dbgs() << "\n"; if (SuccBB) { -          dbgs() << "  with successor " << printMBBReference(*SuccBB) << '\n'; -          if (PredBB) -            dbgs() << "  which has fall-through from " -                   << printMBBReference(*PredBB) << "\n"; -        } dbgs() << "Looking for common tails of at least " -                 << MinCommonTailLength << " instruction" -                 << (MinCommonTailLength == 1 ? "" : "s") << '\n';); +  LLVM_DEBUG( +      dbgs() << "\nTryTailMergeBlocks: "; +      for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() +      << printMBBReference(*MergePotentials[i].getBlock()) +      << (i == e - 1 ? "" : ", "); +      dbgs() << "\n"; if (SuccBB) { +        dbgs() << "  with successor " << printMBBReference(*SuccBB) << '\n'; +        if (PredBB) +          dbgs() << "  which has fall-through from " +                 << printMBBReference(*PredBB) << "\n"; +      } dbgs() << "Looking for common tails of at least " +               << MinCommonTailLength << " instruction" +               << (MinCommonTailLength == 1 ? "" : "s") << '\n';);    // Sort by hash value so that blocks with identical end sequences sort    // together. @@ -1010,19 +1049,19 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,      // MBB is common tail.  Adjust all other BB's to jump to this one.      // Traversal must be forwards so erases work. -    DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB) -                 << " for "); +    LLVM_DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB) +                      << " for ");      for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {        if (commonTailIndex == i)          continue; -      DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock()) -                   << (i == e - 1 ? "" : ", ")); +      LLVM_DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock()) +                        << (i == e - 1 ? "" : ", "));        // Hack the end off BB i, making it jump to BB commonTailIndex instead.        replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB);        // BB i is no longer a predecessor of SuccBB; remove it from the worklist.        MergePotentials.erase(SameTails[i].getMPIter());      } -    DEBUG(dbgs() << "\n"); +    LLVM_DEBUG(dbgs() << "\n");      // We leave commonTailIndex in the worklist in case there are other blocks      // that match it with a smaller number of instructions.      MadeChange = true; @@ -1254,8 +1293,8 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {    // Make sure blocks are numbered in order    MF.RenumberBlocks(); -  // Renumbering blocks alters funclet membership, recalculate it. -  FuncletMembership = getFuncletMembership(MF); +  // Renumbering blocks alters EH scope membership, recalculate it. +  EHScopeMembership = getEHScopeMembership(MF);    for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();         I != E; ) { @@ -1319,6 +1358,53 @@ static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {    return DebugLoc();  } +static void copyDebugInfoToPredecessor(const TargetInstrInfo *TII, +                                       MachineBasicBlock &MBB, +                                       MachineBasicBlock &PredMBB) { +  auto InsertBefore = PredMBB.getFirstTerminator(); +  for (MachineInstr &MI : MBB.instrs()) +    if (MI.isDebugValue()) { +      TII->duplicate(PredMBB, InsertBefore, MI); +      LLVM_DEBUG(dbgs() << "Copied debug value from empty block to pred: " +                        << MI); +    } +} + +static void copyDebugInfoToSuccessor(const TargetInstrInfo *TII, +                                     MachineBasicBlock &MBB, +                                     MachineBasicBlock &SuccMBB) { +  auto InsertBefore = SuccMBB.SkipPHIsAndLabels(SuccMBB.begin()); +  for (MachineInstr &MI : MBB.instrs()) +    if (MI.isDebugValue()) { +      TII->duplicate(SuccMBB, InsertBefore, MI); +      LLVM_DEBUG(dbgs() << "Copied debug value from empty block to succ: " +                        << MI); +    } +} + +// Try to salvage DBG_VALUE instructions from an otherwise empty block. If such +// a basic block is removed we would lose the debug information unless we have +// copied the information to a predecessor/successor. +// +// TODO: This function only handles some simple cases. An alternative would be +// to run a heavier analysis, such as the LiveDebugValues pass, before we do +// branch folding. +static void salvageDebugInfoFromEmptyBlock(const TargetInstrInfo *TII, +                                           MachineBasicBlock &MBB) { +  assert(IsEmptyBlock(&MBB) && "Expected an empty block (except debug info)."); +  // If this MBB is the only predecessor of a successor it is legal to copy +  // DBG_VALUE instructions to the beginning of the successor. +  for (MachineBasicBlock *SuccBB : MBB.successors()) +    if (SuccBB->pred_size() == 1) +      copyDebugInfoToSuccessor(TII, MBB, *SuccBB); +  // If this MBB is the only successor of a predecessor it is legal to copy the +  // DBG_VALUE instructions to the end of the predecessor (just before the +  // terminators, assuming that the terminator isn't affecting the DBG_VALUE). +  for (MachineBasicBlock *PredBB : MBB.predecessors()) +    if (PredBB->succ_size() == 1) +      copyDebugInfoToPredecessor(TII, MBB, *PredBB); +} +  bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {    bool MadeChange = false;    MachineFunction &MF = *MBB->getParent(); @@ -1327,14 +1413,14 @@ ReoptimizeBlock:    MachineFunction::iterator FallThrough = MBB->getIterator();    ++FallThrough; -  // Make sure MBB and FallThrough belong to the same funclet. -  bool SameFunclet = true; -  if (!FuncletMembership.empty() && FallThrough != MF.end()) { -    auto MBBFunclet = FuncletMembership.find(MBB); -    assert(MBBFunclet != FuncletMembership.end()); -    auto FallThroughFunclet = FuncletMembership.find(&*FallThrough); -    assert(FallThroughFunclet != FuncletMembership.end()); -    SameFunclet = MBBFunclet->second == FallThroughFunclet->second; +  // Make sure MBB and FallThrough belong to the same EH scope. +  bool SameEHScope = true; +  if (!EHScopeMembership.empty() && FallThrough != MF.end()) { +    auto MBBEHScope = EHScopeMembership.find(MBB); +    assert(MBBEHScope != EHScopeMembership.end()); +    auto FallThroughEHScope = EHScopeMembership.find(&*FallThrough); +    assert(FallThroughEHScope != EHScopeMembership.end()); +    SameEHScope = MBBEHScope->second == FallThroughEHScope->second;    }    // If this block is empty, make everyone use its fall-through, not the block @@ -1342,7 +1428,8 @@ ReoptimizeBlock:    // points to this block.  Blocks with their addresses taken shouldn't be    // optimized away.    if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() && -      SameFunclet) { +      SameEHScope) { +    salvageDebugInfoFromEmptyBlock(TII, *MBB);      // Dead block?  Leave for cleanup later.      if (MBB->pred_empty()) return MadeChange; @@ -1406,8 +1493,8 @@ ReoptimizeBlock:      if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&          PrevBB.succ_size() == 1 &&          !MBB->hasAddressTaken() && !MBB->isEHPad()) { -      DEBUG(dbgs() << "\nMerging into block: " << PrevBB -                   << "From MBB: " << *MBB); +      LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB +                        << "From MBB: " << *MBB);        // Remove redundant DBG_VALUEs first.        if (PrevBB.begin() != PrevBB.end()) {          MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); @@ -1416,7 +1503,7 @@ ReoptimizeBlock:          // Check if DBG_VALUE at the end of PrevBB is identical to the          // DBG_VALUE at the beginning of MBB.          while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() -               && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { +               && PrevBBIter->isDebugInstr() && MBBIter->isDebugInstr()) {            if (!MBBIter->isIdenticalTo(*PrevBBIter))              break;            MachineInstr &DuplicateDbg = *MBBIter; @@ -1493,8 +1580,8 @@ ReoptimizeBlock:          // Reverse the branch so we will fall through on the previous true cond.          SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);          if (!TII->reverseBranchCondition(NewPriorCond)) { -          DEBUG(dbgs() << "\nMoving MBB: " << *MBB -                       << "To make fallthrough to: " << *PriorTBB << "\n"); +          LLVM_DEBUG(dbgs() << "\nMoving MBB: " << *MBB +                            << "To make fallthrough to: " << *PriorTBB << "\n");            DebugLoc dl = getBranchDebugLoc(PrevBB);            TII->removeBranch(PrevBB); @@ -1829,8 +1916,12 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,    if (Uses.empty())      return Loc; +  // If the terminator is the only instruction in the block and Uses is not +  // empty (or we would have returned above), we can still safely hoist +  // instructions just before the terminator as long as the Defs/Uses are not +  // violated (which is checked in HoistCommonCodeInSuccs).    if (Loc == MBB->begin()) -    return MBB->end(); +    return Loc;    // The terminator is probably a conditional branch, try not to separate the    // branch from condition setting instruction. @@ -1917,7 +2008,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {      return false;    bool HasDups = false; -  SmallVector<unsigned, 4> LocalDefs, LocalKills;    SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet;    MachineBasicBlock::iterator TIB = TBB->begin();    MachineBasicBlock::iterator FIB = FBB->begin(); @@ -2000,7 +2090,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {        if (!Reg)          continue;        if (!AllDefsSet.count(Reg)) { -        LocalKills.push_back(Reg);          continue;        }        if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -2018,7 +2107,6 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {        unsigned Reg = MO.getReg();        if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))          continue; -      LocalDefs.push_back(Reg);        addRegAndItsAliases(Reg, TRI, ActiveDefsSet);        addRegAndItsAliases(Reg, TRI, AllDefsSet);      } @@ -2034,25 +2122,9 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {    MBB->splice(Loc, TBB, TBB->begin(), TIB);    FBB->erase(FBB->begin(), FIB); -  // Update livein's. -  bool ChangedLiveIns = false; -  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { -    unsigned Def = LocalDefs[i]; -    if (ActiveDefsSet.count(Def)) { -      TBB->addLiveIn(Def); -      FBB->addLiveIn(Def); -      ChangedLiveIns = true; -    } -  } -  for (unsigned K : LocalKills) { -    TBB->removeLiveIn(K); -    FBB->removeLiveIn(K); -    ChangedLiveIns = true; -  } - -  if (ChangedLiveIns) { -    TBB->sortUniqueLiveIns(); -    FBB->sortUniqueLiveIns(); +  if (UpdateLiveIns) { +    recomputeLiveIns(*TBB); +    recomputeLiveIns(*FBB);    }    ++NumHoist; diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index 0f0952550137..accd0ab7317b 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -38,11 +38,11 @@ class TargetRegisterInfo;      explicit BranchFolder(bool defaultEnableTailMerge,                            bool CommonHoist, -                          MBFIWrapper &MBFI, -                          const MachineBranchProbabilityInfo &MBPI, +                          MBFIWrapper &FreqInfo, +                          const MachineBranchProbabilityInfo &ProbInfo,                            // Min tail length to merge. Defaults to commandline                            // flag. Ignored for optsize. -                          unsigned MinCommonTailLength = 0); +                          unsigned MinTailLength = 0);      /// Perhaps branch folding, tail merging and other CFG optimizations on the      /// given function.  Block placement changes the layout and may create new @@ -75,7 +75,7 @@ class TargetRegisterInfo;      std::vector<MergePotentialsElt> MergePotentials;      SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; -    DenseMap<const MachineBasicBlock *, int> FuncletMembership; +    DenseMap<const MachineBasicBlock *, int> EHScopeMembership;      class SameTailElt {        MPIterator MPIter; @@ -132,7 +132,7 @@ class TargetRegisterInfo;      LivePhysRegs LiveRegs;    public: -    /// \brief This class keeps track of branch frequencies of newly created +    /// This class keeps track of branch frequencies of newly created      /// blocks and tail-merged blocks.      class MBFIWrapper {      public: diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp index 0d87f142c7cc..c092da2b6602 100644 --- a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -18,6 +18,7 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/Pass.h"  #include "llvm/Support/Compiler.h" @@ -95,7 +96,7 @@ class BranchRelaxation : public MachineFunctionPass {    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI,                                             MachineBasicBlock *DestBB); -  void adjustBlockOffsets(MachineBasicBlock &MBB); +  void adjustBlockOffsets(MachineBasicBlock &Start);    bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const;    bool fixupConditionalBranch(MachineInstr &MI); @@ -287,10 +288,11 @@ bool BranchRelaxation::isBlockInRange(    if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))      return true; -  DEBUG(dbgs() << "Out of range branch to destination " -               << printMBBReference(DestBB) << " from " -               << printMBBReference(*MI.getParent()) << " to " << DestOffset -               << " offset " << DestOffset - BrOffset << '\t' << MI); +  LLVM_DEBUG(dbgs() << "Out of range branch to destination " +                    << printMBBReference(DestBB) << " from " +                    << printMBBReference(*MI.getParent()) << " to " +                    << DestOffset << " offset " << DestOffset - BrOffset << '\t' +                    << MI);    return false;  } @@ -302,8 +304,41 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {    DebugLoc DL = MI.getDebugLoc();    MachineBasicBlock *MBB = MI.getParent();    MachineBasicBlock *TBB = nullptr, *FBB = nullptr; +  MachineBasicBlock *NewBB = nullptr;    SmallVector<MachineOperand, 4> Cond; +  auto insertUncondBranch = [&](MachineBasicBlock *MBB, +                                MachineBasicBlock *DestBB) { +    unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; +    int NewBrSize = 0; +    TII->insertUnconditionalBranch(*MBB, DestBB, DL, &NewBrSize); +    BBSize += NewBrSize; +  }; +  auto insertBranch = [&](MachineBasicBlock *MBB, MachineBasicBlock *TBB, +                          MachineBasicBlock *FBB, +                          SmallVectorImpl<MachineOperand>& Cond) { +    unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; +    int NewBrSize = 0; +    TII->insertBranch(*MBB, TBB, FBB, Cond, DL, &NewBrSize); +    BBSize += NewBrSize; +  }; +  auto removeBranch = [&](MachineBasicBlock *MBB) { +    unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; +    int RemovedSize = 0; +    TII->removeBranch(*MBB, &RemovedSize); +    BBSize -= RemovedSize; +  }; + +  auto finalizeBlockChanges = [&](MachineBasicBlock *MBB, +                                  MachineBasicBlock *NewBB) { +    // Keep the block offsets up to date. +    adjustBlockOffsets(*MBB); + +    // Need to fix live-in lists if we track liveness. +    if (NewBB && TRI->trackLivenessAfterRegAlloc(*MF)) +      computeAndAddLiveIns(LiveRegs, *NewBB); +  }; +    bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond);    assert(!Fail && "branches to be relaxed must be analyzable");    (void)Fail; @@ -316,71 +351,90 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {    // b   L1    // L2: -  if (FBB && isBlockInRange(MI, *FBB)) { -    // Last MI in the BB is an unconditional branch. We can simply invert the -    // condition and swap destinations: -    // beq L1 -    // b   L2 -    // => -    // bne L2 -    // b   L1 -    DEBUG(dbgs() << "  Invert condition and swap " -                    "its destination with " << MBB->back()); - -    TII->reverseBranchCondition(Cond); -    int OldSize = 0, NewSize = 0; -    TII->removeBranch(*MBB, &OldSize); -    TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize); - -    BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); -    return true; -  } else if (FBB) { -    // We need to split the basic block here to obtain two long-range -    // unconditional branches. -    auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock()); -    MF->insert(++MBB->getIterator(), &NewBB); - -    // Insert an entry into BlockInfo to align it properly with the block -    // numbers. -    BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo()); - -    unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size; -    int NewBrSize; -    TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize); -    NewBBSize += NewBrSize; - -    // Update the successor lists according to the transformation to follow. -    // Do it here since if there's no split, no update is needed. -    MBB->replaceSuccessor(FBB, &NewBB); -    NewBB.addSuccessor(FBB); +  bool ReversedCond = !TII->reverseBranchCondition(Cond); +  if (ReversedCond) { +    if (FBB && isBlockInRange(MI, *FBB)) { +      // Last MI in the BB is an unconditional branch. We can simply invert the +      // condition and swap destinations: +      // beq L1 +      // b   L2 +      // => +      // bne L2 +      // b   L1 +      LLVM_DEBUG(dbgs() << "  Invert condition and swap " +                           "its destination with " +                        << MBB->back()); + +      removeBranch(MBB); +      insertBranch(MBB, FBB, TBB, Cond); +      finalizeBlockChanges(MBB, nullptr); +      return true; +    } +    if (FBB) { +      // We need to split the basic block here to obtain two long-range +      // unconditional branches. +      NewBB = createNewBlockAfter(*MBB); + +      insertUncondBranch(NewBB, FBB); +      // Update the succesor lists according to the transformation to follow. +      // Do it here since if there's no split, no update is needed. +      MBB->replaceSuccessor(FBB, NewBB); +      NewBB->addSuccessor(FBB); +    } -    // Need to fix live-in lists if we track liveness. -    if (TRI->trackLivenessAfterRegAlloc(*MF)) -      computeAndAddLiveIns(LiveRegs, NewBB); +    // We now have an appropriate fall-through block in place (either naturally or +    // just created), so we can use the inverted the condition. +    MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); + +    LLVM_DEBUG(dbgs() << "  Insert B to " << printMBBReference(*TBB) +                      << ", invert condition and change dest. to " +                      << printMBBReference(NextBB) << '\n'); + +    removeBranch(MBB); +    // Insert a new conditional branch and a new unconditional branch. +    insertBranch(MBB, &NextBB, TBB, Cond); + +    finalizeBlockChanges(MBB, NewBB); +    return true;    } +  // Branch cond can't be inverted. +  // In this case we always add a block after the MBB. +  LLVM_DEBUG(dbgs() << "  The branch condition can't be inverted. " +                    << "  Insert a new BB after " << MBB->back()); -  // We now have an appropriate fall-through block in place (either naturally or -  // just created), so we can invert the condition. -  MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); +  if (!FBB) +    FBB = &(*std::next(MachineFunction::iterator(MBB))); -  DEBUG(dbgs() << "  Insert B to " << printMBBReference(*TBB) -               << ", invert condition and change dest. to " -               << printMBBReference(NextBB) << '\n'); +  // This is the block with cond. branch and the distance to TBB is too long. +  //    beq L1 +  // L2: -  unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; +  // We do the following transformation: +  //    beq NewBB +  //    b L2 +  // NewBB: +  //    b L1 +  // L2: -  // Insert a new conditional branch and a new unconditional branch. -  int RemovedSize = 0; -  TII->reverseBranchCondition(Cond); -  TII->removeBranch(*MBB, &RemovedSize); -  MBBSize -= RemovedSize; +  NewBB = createNewBlockAfter(*MBB); +  insertUncondBranch(NewBB, TBB); -  int AddedSize = 0; -  TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize); -  MBBSize += AddedSize; +  LLVM_DEBUG(dbgs() << "  Insert cond B to the new BB " +                    << printMBBReference(*NewBB) +                    << "  Keep the exiting condition.\n" +                    << "  Insert B to " << printMBBReference(*FBB) << ".\n" +                    << "  In the new BB: Insert B to " +                    << printMBBReference(*TBB) << ".\n"); -  // Finally, keep the block offsets up to date. -  adjustBlockOffsets(*MBB); +  // Update the successor lists according to the transformation to follow. +  MBB->replaceSuccessor(TBB, NewBB); +  NewBB->addSuccessor(TBB); + +  // Replace branch in the current (MBB) block. +  removeBranch(MBB); +  insertBranch(MBB, NewBB, FBB, Cond); + +  finalizeBlockChanges(MBB, NewBB);    return true;  } @@ -490,7 +544,7 @@ bool BranchRelaxation::relaxBranchInstructions() {  bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {    MF = &mf; -  DEBUG(dbgs() << "***** BranchRelaxation *****\n"); +  LLVM_DEBUG(dbgs() << "***** BranchRelaxation *****\n");    const TargetSubtargetInfo &ST = MF->getSubtarget();    TII = ST.getInstrInfo(); @@ -507,7 +561,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {    // sizes of each block.    scanFunction(); -  DEBUG(dbgs() << "  Basic blocks before relaxation\n"; dumpBBs();); +  LLVM_DEBUG(dbgs() << "  Basic blocks before relaxation\n"; dumpBBs(););    bool MadeChange = false;    while (relaxBranchInstructions()) @@ -516,7 +570,7 @@ bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {    // After a while, this might be made debug-only, but it is not expensive.    verify(); -  DEBUG(dbgs() << "  Basic blocks after relaxation\n\n"; dumpBBs()); +  LLVM_DEBUG(dbgs() << "  Basic blocks after relaxation\n\n"; dumpBBs());    BlockInfo.clear(); diff --git a/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp new file mode 100644 index 000000000000..7f098cb71657 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -0,0 +1,271 @@ +//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Break False Dependency pass. +/// +/// Some instructions have false dependencies which cause unnecessary stalls. +/// For exmaple, instructions that only write part of a register, and implicitly +/// need to read the other parts of the register.  This may cause unwanted +/// stalls preventing otherwise unrelated instructions from executing in +/// parallel in an out-of-order CPU. +/// This pass is aimed at identifying and avoiding these depepndencies when +/// possible. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + + +using namespace llvm; + +namespace llvm { + +class BreakFalseDeps : public MachineFunctionPass { +private: +  MachineFunction *MF; +  const TargetInstrInfo *TII; +  const TargetRegisterInfo *TRI; +  RegisterClassInfo RegClassInfo; + +  /// List of undefined register reads in this block in forward order. +  std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; + +  /// Storage for register unit liveness. +  LivePhysRegs LiveRegSet; + +  ReachingDefAnalysis *RDA; + +public: +  static char ID; // Pass identification, replacement for typeid + +  BreakFalseDeps() : MachineFunctionPass(ID) { +    initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); +  } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesAll(); +    AU.addRequired<ReachingDefAnalysis>(); +    MachineFunctionPass::getAnalysisUsage(AU); +  } + +  bool runOnMachineFunction(MachineFunction &MF) override; + +  MachineFunctionProperties getRequiredProperties() const override { +    return MachineFunctionProperties().set( +      MachineFunctionProperties::Property::NoVRegs); +  } + +private: +  /// Process he given basic block. +  void processBasicBlock(MachineBasicBlock *MBB); + +  /// Update def-ages for registers defined by MI. +  /// Also break dependencies on partial defs and undef uses. +  void processDefs(MachineInstr *MI); + +  /// Helps avoid false dependencies on undef registers by updating the +  /// machine instructions' undef operand to use a register that the instruction +  /// is truly dependent on, or use a register with clearance higher than Pref. +  /// Returns true if it was able to find a true dependency, thus not requiring +  /// a dependency breaking instruction regardless of clearance. +  bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, +    unsigned Pref); + +  /// Return true to if it makes sense to break dependence on a partial +  /// def or undef use. +  bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); + +  /// Break false dependencies on undefined register reads. +  /// Walk the block backward computing precise liveness. This is expensive, so +  /// we only do it on demand. Note that the occurrence of undefined register +  /// reads that should be broken is very rare, but when they occur we may have +  /// many in a single block. +  void processUndefReads(MachineBasicBlock *); +}; + +} // namespace llvm + +#define DEBUG_TYPE "break-false-deps" + +char BreakFalseDeps::ID = 0; +INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) + +FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } + +bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, +  unsigned Pref) { +  MachineOperand &MO = MI->getOperand(OpIdx); +  assert(MO.isUndef() && "Expected undef machine operand"); + +  unsigned OriginalReg = MO.getReg(); + +  // Update only undef operands that have reg units that are mapped to one root. +  for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { +    unsigned NumRoots = 0; +    for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) { +      NumRoots++; +      if (NumRoots > 1) +        return false; +    } +  } + +  // Get the undef operand's register class +  const TargetRegisterClass *OpRC = +    TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); + +  // If the instruction has a true dependency, we can hide the false depdency +  // behind it. +  for (MachineOperand &CurrMO : MI->operands()) { +    if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || +      !OpRC->contains(CurrMO.getReg())) +      continue; +    // We found a true dependency - replace the undef register with the true +    // dependency. +    MO.setReg(CurrMO.getReg()); +    return true; +  } + +  // Go over all registers in the register class and find the register with +  // max clearance or clearance higher than Pref. +  unsigned MaxClearance = 0; +  unsigned MaxClearanceReg = OriginalReg; +  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); +  for (MCPhysReg Reg : Order) { +    unsigned Clearance = RDA->getClearance(MI, Reg); +    if (Clearance <= MaxClearance) +      continue; +    MaxClearance = Clearance; +    MaxClearanceReg = Reg; + +    if (MaxClearance > Pref) +      break; +  } + +  // Update the operand if we found a register with better clearance. +  if (MaxClearanceReg != OriginalReg) +    MO.setReg(MaxClearanceReg); + +  return false; +} + +bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, +  unsigned Pref) { +  unsigned reg = MI->getOperand(OpIdx).getReg(); +  unsigned Clearance = RDA->getClearance(MI, reg); +  LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + +  if (Pref > Clearance) { +    LLVM_DEBUG(dbgs() << ": Break dependency.\n"); +    return true; +  } +  LLVM_DEBUG(dbgs() << ": OK .\n"); +  return false; +} + +void BreakFalseDeps::processDefs(MachineInstr *MI) { +  assert(!MI->isDebugInstr() && "Won't process debug values"); + +  // Break dependence on undef uses. Do this before updating LiveRegs below. +  unsigned OpNum; +  unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); +  if (Pref) { +    bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); +    // We don't need to bother trying to break a dependency if this +    // instruction has a true dependency on that register through another +    // operand - we'll have to wait for it to be available regardless. +    if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) +      UndefReads.push_back(std::make_pair(MI, OpNum)); +  } + +  const MCInstrDesc &MCID = MI->getDesc(); +  for (unsigned i = 0, +    e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); +    i != e; ++i) { +    MachineOperand &MO = MI->getOperand(i); +    if (!MO.isReg() || !MO.getReg()) +      continue; +    if (MO.isUse()) +      continue; +    // Check clearance before partial register updates. +    unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); +    if (Pref && shouldBreakDependence(MI, i, Pref)) +      TII->breakPartialRegDependency(*MI, i, TRI); +  } +} + +void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { +  if (UndefReads.empty()) +    return; + +  // Collect this block's live out register units. +  LiveRegSet.init(*TRI); +  // We do not need to care about pristine registers as they are just preserved +  // but not actually used in the function. +  LiveRegSet.addLiveOutsNoPristines(*MBB); + +  MachineInstr *UndefMI = UndefReads.back().first; +  unsigned OpIdx = UndefReads.back().second; + +  for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { +    // Update liveness, including the current instruction's defs. +    LiveRegSet.stepBackward(I); + +    if (UndefMI == &I) { +      if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) +        TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); + +      UndefReads.pop_back(); +      if (UndefReads.empty()) +        return; + +      UndefMI = UndefReads.back().first; +      OpIdx = UndefReads.back().second; +    } +  } +} + +void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { +  UndefReads.clear(); +  // If this block is not done, it makes little sense to make any decisions +  // based on clearance information. We need to make a second pass anyway, +  // and by then we'll have better information, so we can avoid doing the work +  // to try and break dependencies now. +  for (MachineInstr &MI : *MBB) { +    if (!MI.isDebugInstr()) +      processDefs(&MI); +  } +  processUndefReads(MBB); +} + +bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { +  if (skipFunction(mf.getFunction())) +    return false; +  MF = &mf; +  TII = MF->getSubtarget().getInstrInfo(); +  TRI = MF->getSubtarget().getRegisterInfo(); +  RDA = &getAnalysis<ReachingDefAnalysis>(); + +  RegClassInfo.runOnMachineFunction(mf); + +  LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n"); + +  // Traverse the basic blocks. +  for (MachineBasicBlock &MBB : mf) { +    processBasicBlock(&MBB); +  } + +  return false; +} diff --git a/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp new file mode 100644 index 000000000000..00ebf63fc174 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -0,0 +1,326 @@ +//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This pass verifies incoming and outgoing CFA information of basic +/// blocks. CFA information is information about offset and register set by CFI +/// directives, valid at the start and end of a basic block. This pass checks +/// that outgoing information of predecessors matches incoming information of +/// their successors. Then it checks if blocks have correct CFA calculation rule +/// set and inserts additional CFI instruction at their beginnings if they +/// don't. CFI instructions are inserted if basic blocks have incorrect offset +/// or register set by previous blocks, as a result of a non-linear layout of +/// blocks in a function. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +static cl::opt<bool> VerifyCFI("verify-cfiinstrs", +    cl::desc("Verify Call Frame Information instructions"), +    cl::init(false), +    cl::Hidden); + +namespace { +class CFIInstrInserter : public MachineFunctionPass { + public: +  static char ID; + +  CFIInstrInserter() : MachineFunctionPass(ID) { +    initializeCFIInstrInserterPass(*PassRegistry::getPassRegistry()); +  } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesAll(); +    MachineFunctionPass::getAnalysisUsage(AU); +  } + +  bool runOnMachineFunction(MachineFunction &MF) override { +    if (!MF.getMMI().hasDebugInfo() && +        !MF.getFunction().needsUnwindTableEntry()) +      return false; + +    MBBVector.resize(MF.getNumBlockIDs()); +    calculateCFAInfo(MF); + +    if (VerifyCFI) { +      if (unsigned ErrorNum = verify(MF)) +        report_fatal_error("Found " + Twine(ErrorNum) + +                           " in/out CFI information errors."); +    } +    bool insertedCFI = insertCFIInstrs(MF); +    MBBVector.clear(); +    return insertedCFI; +  } + + private: +  struct MBBCFAInfo { +    MachineBasicBlock *MBB; +    /// Value of cfa offset valid at basic block entry. +    int IncomingCFAOffset = -1; +    /// Value of cfa offset valid at basic block exit. +    int OutgoingCFAOffset = -1; +    /// Value of cfa register valid at basic block entry. +    unsigned IncomingCFARegister = 0; +    /// Value of cfa register valid at basic block exit. +    unsigned OutgoingCFARegister = 0; +    /// If in/out cfa offset and register values for this block have already +    /// been set or not. +    bool Processed = false; +  }; + +  /// Contains cfa offset and register values valid at entry and exit of basic +  /// blocks. +  std::vector<MBBCFAInfo> MBBVector; + +  /// Calculate cfa offset and register values valid at entry and exit for all +  /// basic blocks in a function. +  void calculateCFAInfo(MachineFunction &MF); +  /// Calculate cfa offset and register values valid at basic block exit by +  /// checking the block for CFI instructions. Block's incoming CFA info remains +  /// the same. +  void calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo); +  /// Update in/out cfa offset and register values for successors of the basic +  /// block. +  void updateSuccCFAInfo(MBBCFAInfo &MBBInfo); + +  /// Check if incoming CFA information of a basic block matches outgoing CFA +  /// information of the previous block. If it doesn't, insert CFI instruction +  /// at the beginning of the block that corrects the CFA calculation rule for +  /// that block. +  bool insertCFIInstrs(MachineFunction &MF); +  /// Return the cfa offset value that should be set at the beginning of a MBB +  /// if needed. The negated value is needed when creating CFI instructions that +  /// set absolute offset. +  int getCorrectCFAOffset(MachineBasicBlock *MBB) { +    return -MBBVector[MBB->getNumber()].IncomingCFAOffset; +  } + +  void report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ); +  /// Go through each MBB in a function and check that outgoing offset and +  /// register of its predecessors match incoming offset and register of that +  /// MBB, as well as that incoming offset and register of its successors match +  /// outgoing offset and register of the MBB. +  unsigned verify(MachineFunction &MF); +}; +}  // namespace + +char CFIInstrInserter::ID = 0; +INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter", +                "Check CFA info and insert CFI instructions if needed", false, +                false) +FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); } + +void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) { +  // Initial CFA offset value i.e. the one valid at the beginning of the +  // function. +  int InitialOffset = +      MF.getSubtarget().getFrameLowering()->getInitialCFAOffset(MF); +  // Initial CFA register value i.e. the one valid at the beginning of the +  // function. +  unsigned InitialRegister = +      MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF); + +  // Initialize MBBMap. +  for (MachineBasicBlock &MBB : MF) { +    MBBCFAInfo MBBInfo; +    MBBInfo.MBB = &MBB; +    MBBInfo.IncomingCFAOffset = InitialOffset; +    MBBInfo.OutgoingCFAOffset = InitialOffset; +    MBBInfo.IncomingCFARegister = InitialRegister; +    MBBInfo.OutgoingCFARegister = InitialRegister; +    MBBVector[MBB.getNumber()] = MBBInfo; +  } + +  // Set in/out cfa info for all blocks in the function. This traversal is based +  // on the assumption that the first block in the function is the entry block +  // i.e. that it has initial cfa offset and register values as incoming CFA +  // information. +  for (MachineBasicBlock &MBB : MF) { +    if (MBBVector[MBB.getNumber()].Processed) continue; +    updateSuccCFAInfo(MBBVector[MBB.getNumber()]); +  } +} + +void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { +  // Outgoing cfa offset set by the block. +  int SetOffset = MBBInfo.IncomingCFAOffset; +  // Outgoing cfa register set by the block. +  unsigned SetRegister = MBBInfo.IncomingCFARegister; +  const std::vector<MCCFIInstruction> &Instrs = +      MBBInfo.MBB->getParent()->getFrameInstructions(); + +  // Determine cfa offset and register set by the block. +  for (MachineInstr &MI : *MBBInfo.MBB) { +    if (MI.isCFIInstruction()) { +      unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); +      const MCCFIInstruction &CFI = Instrs[CFIIndex]; +      switch (CFI.getOperation()) { +      case MCCFIInstruction::OpDefCfaRegister: +        SetRegister = CFI.getRegister(); +        break; +      case MCCFIInstruction::OpDefCfaOffset: +        SetOffset = CFI.getOffset(); +        break; +      case MCCFIInstruction::OpAdjustCfaOffset: +        SetOffset += CFI.getOffset(); +        break; +      case MCCFIInstruction::OpDefCfa: +        SetRegister = CFI.getRegister(); +        SetOffset = CFI.getOffset(); +        break; +      case MCCFIInstruction::OpRememberState: +        // TODO: Add support for handling cfi_remember_state. +#ifndef NDEBUG +        report_fatal_error( +            "Support for cfi_remember_state not implemented! Value of CFA " +            "may be incorrect!\n"); +#endif +        break; +      case MCCFIInstruction::OpRestoreState: +        // TODO: Add support for handling cfi_restore_state. +#ifndef NDEBUG +        report_fatal_error( +            "Support for cfi_restore_state not implemented! Value of CFA may " +            "be incorrect!\n"); +#endif +        break; +      // Other CFI directives do not affect CFA value. +      case MCCFIInstruction::OpSameValue: +      case MCCFIInstruction::OpOffset: +      case MCCFIInstruction::OpRelOffset: +      case MCCFIInstruction::OpEscape: +      case MCCFIInstruction::OpRestore: +      case MCCFIInstruction::OpUndefined: +      case MCCFIInstruction::OpRegister: +      case MCCFIInstruction::OpWindowSave: +      case MCCFIInstruction::OpGnuArgsSize: +        break; +      } +    } +  } + +  MBBInfo.Processed = true; + +  // Update outgoing CFA info. +  MBBInfo.OutgoingCFAOffset = SetOffset; +  MBBInfo.OutgoingCFARegister = SetRegister; +} + +void CFIInstrInserter::updateSuccCFAInfo(MBBCFAInfo &MBBInfo) { +  SmallVector<MachineBasicBlock *, 4> Stack; +  Stack.push_back(MBBInfo.MBB); + +  do { +    MachineBasicBlock *Current = Stack.pop_back_val(); +    MBBCFAInfo &CurrentInfo = MBBVector[Current->getNumber()]; +    if (CurrentInfo.Processed) +      continue; + +    calculateOutgoingCFAInfo(CurrentInfo); +    for (auto *Succ : CurrentInfo.MBB->successors()) { +      MBBCFAInfo &SuccInfo = MBBVector[Succ->getNumber()]; +      if (!SuccInfo.Processed) { +        SuccInfo.IncomingCFAOffset = CurrentInfo.OutgoingCFAOffset; +        SuccInfo.IncomingCFARegister = CurrentInfo.OutgoingCFARegister; +        Stack.push_back(Succ); +      } +    } +  } while (!Stack.empty()); +} + +bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) { +  const MBBCFAInfo *PrevMBBInfo = &MBBVector[MF.front().getNumber()]; +  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); +  bool InsertedCFIInstr = false; + +  for (MachineBasicBlock &MBB : MF) { +    // Skip the first MBB in a function +    if (MBB.getNumber() == MF.front().getNumber()) continue; + +    const MBBCFAInfo &MBBInfo = MBBVector[MBB.getNumber()]; +    auto MBBI = MBBInfo.MBB->begin(); +    DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI); + +    if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) { +      // If both outgoing offset and register of a previous block don't match +      // incoming offset and register of this block, add a def_cfa instruction +      // with the correct offset and register for this block. +      if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) { +        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( +            nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB))); +        BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) +            .addCFIIndex(CFIIndex); +        // If outgoing offset of a previous block doesn't match incoming offset +        // of this block, add a def_cfa_offset instruction with the correct +        // offset for this block. +      } else { +        unsigned CFIIndex = +            MF.addFrameInst(MCCFIInstruction::createDefCfaOffset( +                nullptr, getCorrectCFAOffset(&MBB))); +        BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) +            .addCFIIndex(CFIIndex); +      } +      InsertedCFIInstr = true; +      // If outgoing register of a previous block doesn't match incoming +      // register of this block, add a def_cfa_register instruction with the +      // correct register for this block. +    } else if (PrevMBBInfo->OutgoingCFARegister != +               MBBInfo.IncomingCFARegister) { +      unsigned CFIIndex = +          MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( +              nullptr, MBBInfo.IncomingCFARegister)); +      BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) +          .addCFIIndex(CFIIndex); +      InsertedCFIInstr = true; +    } +    PrevMBBInfo = &MBBInfo; +  } +  return InsertedCFIInstr; +} + +void CFIInstrInserter::report(const MBBCFAInfo &Pred, const MBBCFAInfo &Succ) { +  errs() << "*** Inconsistent CFA register and/or offset between pred and succ " +            "***\n"; +  errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() +         << " in " << Pred.MBB->getParent()->getName() +         << " outgoing CFA Reg:" << Pred.OutgoingCFARegister << "\n"; +  errs() << "Pred: " << Pred.MBB->getName() << " #" << Pred.MBB->getNumber() +         << " in " << Pred.MBB->getParent()->getName() +         << " outgoing CFA Offset:" << Pred.OutgoingCFAOffset << "\n"; +  errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber() +         << " incoming CFA Reg:" << Succ.IncomingCFARegister << "\n"; +  errs() << "Succ: " << Succ.MBB->getName() << " #" << Succ.MBB->getNumber() +         << " incoming CFA Offset:" << Succ.IncomingCFAOffset << "\n"; +} + +unsigned CFIInstrInserter::verify(MachineFunction &MF) { +  unsigned ErrorNum = 0; +  for (auto *CurrMBB : depth_first(&MF)) { +    const MBBCFAInfo &CurrMBBInfo = MBBVector[CurrMBB->getNumber()]; +    for (MachineBasicBlock *Succ : CurrMBB->successors()) { +      const MBBCFAInfo &SuccMBBInfo = MBBVector[Succ->getNumber()]; +      // Check that incoming offset and register values of successors match the +      // outgoing offset and register values of CurrMBB +      if (SuccMBBInfo.IncomingCFAOffset != CurrMBBInfo.OutgoingCFAOffset || +          SuccMBBInfo.IncomingCFARegister != CurrMBBInfo.OutgoingCFARegister) { +        report(CurrMBBInfo, SuccMBBInfo); +        ErrorNum++; +      } +    } +  } +  return ErrorNum; +} diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index b8920a601938..57541182cab2 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -35,8 +35,8 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,                             const MachineLoopInfo &MLI,                             const MachineBlockFrequencyInfo &MBFI,                             VirtRegAuxInfo::NormalizingFn norm) { -  DEBUG(dbgs() << "********** Compute Spill Weights **********\n" -               << "********** Function: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n" +                    << "********** Function: " << MF.getName() << '\n');    MachineRegisterInfo &MRI = MF.getRegInfo();    VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); @@ -236,7 +236,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,        continue;      numInstr++; -    if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) +    if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugInstr())        continue;      if (!visited.insert(mi).second)        continue; diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 879cd2859ee9..2f845354c570 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -23,11 +23,14 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {    initializeAtomicExpandPass(Registry);    initializeBranchFolderPassPass(Registry);    initializeBranchRelaxationPass(Registry); +  initializeCFIInstrInserterPass(Registry);    initializeCodeGenPreparePass(Registry);    initializeDeadMachineInstructionElimPass(Registry);    initializeDetectDeadLanesPass(Registry);    initializeDwarfEHPreparePass(Registry);    initializeEarlyIfConverterPass(Registry); +  initializeEarlyMachineLICMPass(Registry); +  initializeEarlyTailDuplicatePass(Registry);    initializeExpandISelPseudosPass(Registry);    initializeExpandMemCmpPassPass(Registry);    initializeExpandPostRAPass(Registry); @@ -48,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {    initializeLiveVariablesPass(Registry);    initializeLocalStackSlotPassPass(Registry);    initializeLowerIntrinsicsPass(Registry); +  initializeMIRCanonicalizerPass(Registry);    initializeMachineBlockFrequencyInfoPass(Registry);    initializeMachineBlockPlacementPass(Registry);    initializeMachineBlockPlacementStatsPass(Registry); @@ -74,12 +78,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {    initializePeepholeOptimizerPass(Registry);    initializePostMachineSchedulerPass(Registry);    initializePostRAHazardRecognizerPass(Registry); +  initializePostRAMachineSinkingPass(Registry);    initializePostRASchedulerPass(Registry);    initializePreISelIntrinsicLoweringLegacyPassPass(Registry);    initializeProcessImplicitDefsPass(Registry);    initializeRABasicPass(Registry); -  initializeRegAllocFastPass(Registry);    initializeRAGreedyPass(Registry); +  initializeRegAllocFastPass(Registry); +  initializeRegUsageInfoCollectorPass(Registry); +  initializeRegUsageInfoPropagationPass(Registry);    initializeRegisterCoalescerPass(Registry);    initializeRenameIndependentSubregsPass(Registry);    initializeSafeStackLegacyPassPass(Registry); @@ -90,7 +97,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {    initializeStackMapLivenessPass(Registry);    initializeStackProtectorPass(Registry);    initializeStackSlotColoringPass(Registry); -  initializeTailDuplicatePassPass(Registry); +  initializeTailDuplicatePass(Registry);    initializeTargetPassConfigPass(Registry);    initializeTwoAddressInstructionPassPass(Registry);    initializeUnpackMachineBundlesPass(Registry); @@ -98,9 +105,9 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {    initializeUnreachableMachineBlockElimPass(Registry);    initializeVirtRegMapPass(Registry);    initializeVirtRegRewriterPass(Registry); +  initializeWasmEHPreparePass(Registry);    initializeWinEHPreparePass(Registry);    initializeXRayInstrumentationPass(Registry); -  initializeMIRCanonicalizerPass(Registry);  }  void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 26ca8d4ee88c..c41beb094604 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -30,15 +30,16 @@  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/CodeGen/Analysis.h"  #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Argument.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/BasicBlock.h" @@ -79,13 +80,13 @@  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/BypassSlowDivision.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/SimplifyLibCalls.h"  #include <algorithm>  #include <cassert> @@ -196,7 +197,7 @@ AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),                  cl::desc("Allow creation of Phis in Address sinking."));  static cl::opt<bool> -AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(false), +AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),                     cl::desc("Allow creation of selects in Address sinking."));  static cl::opt<bool> AddrSinkCombineBaseReg( @@ -215,6 +216,11 @@ static cl::opt<bool> AddrSinkCombineScaledReg(      "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),      cl::desc("Allow combining of ScaledReg field in Address sinking.")); +static cl::opt<bool> +    EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, +                         cl::init(true), +                         cl::desc("Enable splitting large offset of GEP.")); +  namespace {  using SetOfInstrs = SmallPtrSet<Instruction *, 16>; @@ -260,6 +266,20 @@ class TypePromotionTransaction;      /// Keep track of sext chains based on their initial value.      DenseMap<Value *, Instruction *> SeenChainsForSExt; +    /// Keep track of GEPs accessing the same data structures such as structs or +    /// arrays that are candidates to be split later because of their large +    /// size. +    DenseMap< +        AssertingVH<Value>, +        SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>> +        LargeOffsetGEPMap; + +    /// Keep track of new GEP base after splitting the GEPs having large offset. +    SmallSet<AssertingVH<Value>, 2> NewGEPBases; + +    /// Map serial numbers to Large offset GEPs. +    DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID; +      /// Keep track of SExt promoted.      ValueToSExts ValToSExtendedUses; @@ -301,16 +321,16 @@ class TypePromotionTransaction;                                         bool isPreheader);      bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);      bool optimizeInst(Instruction *I, bool &ModifiedDT); -    bool optimizeMemoryInst(Instruction *I, Value *Addr, -                            Type *AccessTy, unsigned AS); +    bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, +                            Type *AccessTy, unsigned AddrSpace);      bool optimizeInlineAsmInst(CallInst *CS);      bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);      bool optimizeExt(Instruction *&I);      bool optimizeExtUses(Instruction *I); -    bool optimizeLoadExt(LoadInst *I); +    bool optimizeLoadExt(LoadInst *Load);      bool optimizeSelectInst(SelectInst *SI); -    bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); -    bool optimizeSwitchInst(SwitchInst *CI); +    bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); +    bool optimizeSwitchInst(SwitchInst *SI);      bool optimizeExtractElementInst(Instruction *Inst);      bool dupRetToEnableTailCallOpts(BasicBlock *BB);      bool placeDbgValues(Function &F); @@ -321,6 +341,7 @@ class TypePromotionTransaction;                            SmallVectorImpl<Instruction *> &ProfitablyMovedExts,                            unsigned CreatedInstsCost = 0);      bool mergeSExts(Function &F); +    bool splitLargeGEPOffsets();      bool performAddressTypePromotion(          Instruction *&Inst,          bool AllowPromotionWithoutCommonHeader, @@ -414,6 +435,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {      SeenChainsForSExt.clear();      ValToSExtendedUses.clear();      RemovedInsts.clear(); +    LargeOffsetGEPMap.clear(); +    LargeOffsetGEPID.clear();      for (Function::iterator I = F.begin(); I != F.end(); ) {        BasicBlock *BB = &*I++;        bool ModifiedDTOnIteration = false; @@ -425,6 +448,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {      }      if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())        MadeChange |= mergeSExts(F); +    if (!LargeOffsetGEPMap.empty()) +      MadeChange |= splitLargeGEPOffsets();      // Really free removed instructions during promotion.      for (Instruction *I : RemovedInsts) @@ -437,7 +462,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) {    if (!DisableBranchOpts) {      MadeChange = false; -    SmallPtrSet<BasicBlock*, 8> WorkList; +    // Use a set vector to get deterministic iteration order. The order the +    // blocks are removed may affect whether or not PHI nodes in successors +    // are removed. +    SmallSetVector<BasicBlock*, 8> WorkList;      for (BasicBlock &BB : F) {        SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));        MadeChange |= ConstantFoldTerminator(&BB, true); @@ -452,8 +480,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {      // Delete the dead blocks and any of their dead successors.      MadeChange |= !WorkList.empty();      while (!WorkList.empty()) { -      BasicBlock *BB = *WorkList.begin(); -      WorkList.erase(BB); +      BasicBlock *BB = WorkList.pop_back_val();        SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));        DeleteDeadBlock(BB); @@ -491,8 +518,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {  bool CodeGenPrepare::eliminateFallThrough(Function &F) {    bool Changed = false;    // Scan all of the blocks in the function, except for the entry block. -  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { -    BasicBlock *BB = &*I++; +  // Use a temporary array to avoid iterator being invalidated when +  // deleting blocks. +  SmallVector<WeakTrackingVH, 16> Blocks; +  for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) +    Blocks.push_back(&Block); + +  for (auto &Block : Blocks) { +    auto *BB = cast_or_null<BasicBlock>(Block); +    if (!BB) +      continue;      // If the destination block has a single pred, then this is a trivial      // edge, just collapse it.      BasicBlock *SinglePred = BB->getSinglePredecessor(); @@ -503,17 +538,10 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {      BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());      if (Term && !Term->isConditional()) {        Changed = true; -      DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); -      // Remember if SinglePred was the entry block of the function. -      // If so, we will need to move BB back to the entry position. -      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); -      MergeBasicBlockIntoOnlyPred(BB, nullptr); - -      if (isEntry && BB != &BB->getParent()->getEntryBlock()) -        BB->moveBefore(&BB->getParent()->getEntryBlock()); +      LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n"); -      // We have erased a block. Update the iterator. -      I = BB->getIterator(); +      // Merge BB into SinglePred and delete it. +      MergeBlockIntoPredecessor(BB);      }    }    return Changed; @@ -566,9 +594,17 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {    }    bool MadeChange = false; +  // Copy blocks into a temporary array to avoid iterator invalidation issues +  // as we remove them.    // Note that this intentionally skips the entry block. -  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { -    BasicBlock *BB = &*I++; +  SmallVector<WeakTrackingVH, 16> Blocks; +  for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) +    Blocks.push_back(&Block); + +  for (auto &Block : Blocks) { +    BasicBlock *BB = cast_or_null<BasicBlock>(Block); +    if (!BB) +      continue;      BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);      if (!DestBB ||          !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) @@ -730,21 +766,20 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {    BranchInst *BI = cast<BranchInst>(BB->getTerminator());    BasicBlock *DestBB = BI->getSuccessor(0); -  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); +  LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" +                    << *BB << *DestBB);    // If the destination block has a single pred, then this is a trivial edge,    // just collapse it.    if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {      if (SinglePred != DestBB) { -      // Remember if SinglePred was the entry block of the function.  If so, we -      // will need to move BB back to the entry position. -      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); -      MergeBasicBlockIntoOnlyPred(DestBB, nullptr); - -      if (isEntry && BB != &BB->getParent()->getEntryBlock()) -        BB->moveBefore(&BB->getParent()->getEntryBlock()); - -      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); +      assert(SinglePred == BB && +             "Single predecessor not the same as predecessor"); +      // Merge DestBB into SinglePred/BB and delete it. +      MergeBlockIntoPredecessor(DestBB); +      // Note: BB(=SinglePred) will not be deleted on this path. +      // DestBB(=its single successor) is the one that was deleted. +      LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");        return;      }    } @@ -782,7 +817,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {    BB->eraseFromParent();    ++NumBlocksElim; -  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); +  LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");  }  // Computes a map of base pointer relocation instructions to corresponding @@ -1024,6 +1059,7 @@ static bool SinkCast(CastInst *CI) {        assert(InsertPt != UserBB->end());        InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),                                        CI->getType(), "", &*InsertPt); +      InsertedCast->setDebugLoc(CI->getDebugLoc());      }      // Replace a use of the cast with a use of the new cast. @@ -1247,8 +1283,8 @@ static bool sinkAndCmp0Expression(Instruction *AndI,    if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))      return false; -  DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); -  DEBUG(AndI->getParent()->dump()); +  LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n"); +  LLVM_DEBUG(AndI->getParent()->dump());    // Push the 'and' into the same block as the icmp 0.  There should only be    // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any @@ -1261,7 +1297,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,      // Preincrement use iterator so we don't invalidate it.      ++UI; -    DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n"); +    LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");      // Keep the 'and' in the same place if the use is already in the same block.      Instruction *InsertPt = @@ -1275,7 +1311,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,      // Replace a use of the 'and' with a use of the new 'and'.      TheUse = InsertedAnd;      ++NumAndUses; -    DEBUG(User->getParent()->dump()); +    LLVM_DEBUG(User->getParent()->dump());    }    // We removed all uses, nuke the and. @@ -1388,7 +1424,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,  ///   %x.extract.shift.1 = lshr i64 %arg1, 32  ///   %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16  /// -/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// CodeGen will recognize the pattern in BB2 and generate BitExtract  /// instruction.  /// Return true if any changes are made.  static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, @@ -1434,7 +1470,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,        // cmp i16 trunc.result, opnd2        //        if (isa<TruncInst>(User) && shiftIsLegal -          // If the type of the truncate is legal, no trucate will be +          // If the type of the truncate is legal, no truncate will be            // introduced in other basic blocks.            &&            (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) @@ -1581,7 +1617,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {        // if size - offset meets the size threshold.        if (!Arg->getType()->isPointerTy())          continue; -      APInt Offset(DL->getPointerSizeInBits( +      APInt Offset(DL->getIndexSizeInBits(                         cast<PointerType>(Arg->getType())->getAddressSpace()),                     0);        Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); @@ -1606,11 +1642,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {      // If this is a memcpy (or similar) then we may be able to improve the      // alignment      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { -      unsigned Align = getKnownAlignment(MI->getDest(), *DL); -      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) -        Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); -      if (Align > MI->getAlignment()) -        MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); +      unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL); +      if (DestAlign > MI->getDestAlignment()) +        MI->setDestAlignment(DestAlign); +      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { +        unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL); +        if (SrcAlign > MTI->getSourceAlignment()) +          MTI->setSourceAlignment(SrcAlign); +      }      }    } @@ -1664,7 +1703,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {        InsertedInsts.insert(ExtVal);        return true;      } -    case Intrinsic::invariant_group_barrier: +    case Intrinsic::launder_invariant_group: +    case Intrinsic::strip_invariant_group:        II->replaceAllUsesWith(II->getArgOperand(0));        II->eraseFromParent();        return true; @@ -2018,11 +2058,11 @@ LLVM_DUMP_METHOD void ExtAddrMode::dump() const {  namespace { -/// \brief This class provides transaction based operation on the IR. +/// This class provides transaction based operation on the IR.  /// Every change made through this class is recorded in the internal state and  /// can be undone (rollback) until commit is called.  class TypePromotionTransaction { -  /// \brief This represents the common interface of the individual transaction. +  /// This represents the common interface of the individual transaction.    /// Each class implements the logic for doing one specific modification on    /// the IR via the TypePromotionTransaction.    class TypePromotionAction { @@ -2031,20 +2071,20 @@ class TypePromotionTransaction {      Instruction *Inst;    public: -    /// \brief Constructor of the action. +    /// Constructor of the action.      /// The constructor performs the related action on the IR.      TypePromotionAction(Instruction *Inst) : Inst(Inst) {}      virtual ~TypePromotionAction() = default; -    /// \brief Undo the modification done by this action. +    /// Undo the modification done by this action.      /// When this method is called, the IR must be in the same state as it was      /// before this action was applied.      /// \pre Undoing the action works if and only if the IR is in the exact same      /// state as it was directly after this action was applied.      virtual void undo() = 0; -    /// \brief Advocate every change made by this action. +    /// Advocate every change made by this action.      /// When the results on the IR of the action are to be kept, it is important      /// to call this function, otherwise hidden information may be kept forever.      virtual void commit() { @@ -2052,12 +2092,12 @@ class TypePromotionTransaction {      }    }; -  /// \brief Utility to remember the position of an instruction. +  /// Utility to remember the position of an instruction.    class InsertionHandler {      /// Position of an instruction.      /// Either an instruction:      /// - Is the first in a basic block: BB is used. -    /// - Has a previous instructon: PrevInst is used. +    /// - Has a previous instruction: PrevInst is used.      union {        Instruction *PrevInst;        BasicBlock *BB; @@ -2067,7 +2107,7 @@ class TypePromotionTransaction {      bool HasPrevInstruction;    public: -    /// \brief Record the position of \p Inst. +    /// Record the position of \p Inst.      InsertionHandler(Instruction *Inst) {        BasicBlock::iterator It = Inst->getIterator();        HasPrevInstruction = (It != (Inst->getParent()->begin())); @@ -2077,7 +2117,7 @@ class TypePromotionTransaction {          Point.BB = Inst->getParent();      } -    /// \brief Insert \p Inst at the recorded position. +    /// Insert \p Inst at the recorded position.      void insert(Instruction *Inst) {        if (HasPrevInstruction) {          if (Inst->getParent()) @@ -2093,27 +2133,28 @@ class TypePromotionTransaction {      }    }; -  /// \brief Move an instruction before another. +  /// Move an instruction before another.    class InstructionMoveBefore : public TypePromotionAction {      /// Original position of the instruction.      InsertionHandler Position;    public: -    /// \brief Move \p Inst before \p Before. +    /// Move \p Inst before \p Before.      InstructionMoveBefore(Instruction *Inst, Instruction *Before)          : TypePromotionAction(Inst), Position(Inst) { -      DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); +      LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before +                        << "\n");        Inst->moveBefore(Before);      } -    /// \brief Move the instruction back to its original position. +    /// Move the instruction back to its original position.      void undo() override { -      DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");        Position.insert(Inst);      }    }; -  /// \brief Set the operand of an instruction with a new value. +  /// Set the operand of an instruction with a new value.    class OperandSetter : public TypePromotionAction {      /// Original operand of the instruction.      Value *Origin; @@ -2122,35 +2163,35 @@ class TypePromotionTransaction {      unsigned Idx;    public: -    /// \brief Set \p Idx operand of \p Inst with \p NewVal. +    /// Set \p Idx operand of \p Inst with \p NewVal.      OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)          : TypePromotionAction(Inst), Idx(Idx) { -      DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" -                   << "for:" << *Inst << "\n" -                   << "with:" << *NewVal << "\n"); +      LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" +                        << "for:" << *Inst << "\n" +                        << "with:" << *NewVal << "\n");        Origin = Inst->getOperand(Idx);        Inst->setOperand(Idx, NewVal);      } -    /// \brief Restore the original value of the instruction. +    /// Restore the original value of the instruction.      void undo() override { -      DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" -                   << "for: " << *Inst << "\n" -                   << "with: " << *Origin << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" +                        << "for: " << *Inst << "\n" +                        << "with: " << *Origin << "\n");        Inst->setOperand(Idx, Origin);      }    }; -  /// \brief Hide the operands of an instruction. +  /// Hide the operands of an instruction.    /// Do as if this instruction was not using any of its operands.    class OperandsHider : public TypePromotionAction {      /// The list of original operands.      SmallVector<Value *, 4> OriginalValues;    public: -    /// \brief Remove \p Inst from the uses of the operands of \p Inst. +    /// Remove \p Inst from the uses of the operands of \p Inst.      OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { -      DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); +      LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");        unsigned NumOpnds = Inst->getNumOperands();        OriginalValues.reserve(NumOpnds);        for (unsigned It = 0; It < NumOpnds; ++It) { @@ -2164,114 +2205,114 @@ class TypePromotionTransaction {        }      } -    /// \brief Restore the original list of uses. +    /// Restore the original list of uses.      void undo() override { -      DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");        for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)          Inst->setOperand(It, OriginalValues[It]);      }    }; -  /// \brief Build a truncate instruction. +  /// Build a truncate instruction.    class TruncBuilder : public TypePromotionAction {      Value *Val;    public: -    /// \brief Build a truncate instruction of \p Opnd producing a \p Ty +    /// Build a truncate instruction of \p Opnd producing a \p Ty      /// result.      /// trunc Opnd to Ty.      TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {        IRBuilder<> Builder(Opnd);        Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); -      DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); +      LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");      } -    /// \brief Get the built value. +    /// Get the built value.      Value *getBuiltValue() { return Val; } -    /// \brief Remove the built instruction. +    /// Remove the built instruction.      void undo() override { -      DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");        if (Instruction *IVal = dyn_cast<Instruction>(Val))          IVal->eraseFromParent();      }    }; -  /// \brief Build a sign extension instruction. +  /// Build a sign extension instruction.    class SExtBuilder : public TypePromotionAction {      Value *Val;    public: -    /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty +    /// Build a sign extension instruction of \p Opnd producing a \p Ty      /// result.      /// sext Opnd to Ty.      SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)          : TypePromotionAction(InsertPt) {        IRBuilder<> Builder(InsertPt);        Val = Builder.CreateSExt(Opnd, Ty, "promoted"); -      DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); +      LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");      } -    /// \brief Get the built value. +    /// Get the built value.      Value *getBuiltValue() { return Val; } -    /// \brief Remove the built instruction. +    /// Remove the built instruction.      void undo() override { -      DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");        if (Instruction *IVal = dyn_cast<Instruction>(Val))          IVal->eraseFromParent();      }    }; -  /// \brief Build a zero extension instruction. +  /// Build a zero extension instruction.    class ZExtBuilder : public TypePromotionAction {      Value *Val;    public: -    /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty +    /// Build a zero extension instruction of \p Opnd producing a \p Ty      /// result.      /// zext Opnd to Ty.      ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)          : TypePromotionAction(InsertPt) {        IRBuilder<> Builder(InsertPt);        Val = Builder.CreateZExt(Opnd, Ty, "promoted"); -      DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); +      LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");      } -    /// \brief Get the built value. +    /// Get the built value.      Value *getBuiltValue() { return Val; } -    /// \brief Remove the built instruction. +    /// Remove the built instruction.      void undo() override { -      DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");        if (Instruction *IVal = dyn_cast<Instruction>(Val))          IVal->eraseFromParent();      }    }; -  /// \brief Mutate an instruction to another type. +  /// Mutate an instruction to another type.    class TypeMutator : public TypePromotionAction {      /// Record the original type.      Type *OrigTy;    public: -    /// \brief Mutate the type of \p Inst into \p NewTy. +    /// Mutate the type of \p Inst into \p NewTy.      TypeMutator(Instruction *Inst, Type *NewTy)          : TypePromotionAction(Inst), OrigTy(Inst->getType()) { -      DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy +                        << "\n");        Inst->mutateType(NewTy);      } -    /// \brief Mutate the instruction back to its original type. +    /// Mutate the instruction back to its original type.      void undo() override { -      DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy +                        << "\n");        Inst->mutateType(OrigTy);      }    }; -  /// \brief Replace the uses of an instruction by another instruction. +  /// Replace the uses of an instruction by another instruction.    class UsesReplacer : public TypePromotionAction {      /// Helper structure to keep track of the replaced uses.      struct InstructionAndIdx { @@ -2291,10 +2332,10 @@ class TypePromotionTransaction {      using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;    public: -    /// \brief Replace all the use of \p Inst by \p New. +    /// Replace all the use of \p Inst by \p New.      UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { -      DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New +                        << "\n");        // Record the original uses.        for (Use &U : Inst->uses()) {          Instruction *UserI = cast<Instruction>(U.getUser()); @@ -2304,9 +2345,9 @@ class TypePromotionTransaction {        Inst->replaceAllUsesWith(New);      } -    /// \brief Reassign the original uses of Inst to Inst. +    /// Reassign the original uses of Inst to Inst.      void undo() override { -      DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");        for (use_iterator UseIt = OriginalUses.begin(),                          EndIt = OriginalUses.end();             UseIt != EndIt; ++UseIt) { @@ -2315,7 +2356,7 @@ class TypePromotionTransaction {      }    }; -  /// \brief Remove an instruction from the IR. +  /// Remove an instruction from the IR.    class InstructionRemover : public TypePromotionAction {      /// Original position of the instruction.      InsertionHandler Inserter; @@ -2331,7 +2372,7 @@ class TypePromotionTransaction {      SetOfInstrs &RemovedInsts;    public: -    /// \brief Remove all reference of \p Inst and optinally replace all its +    /// Remove all reference of \p Inst and optionally replace all its      /// uses with New.      /// \p RemovedInsts Keep track of the instructions removed by this Action.      /// \pre If !Inst->use_empty(), then New != nullptr @@ -2341,7 +2382,7 @@ class TypePromotionTransaction {            RemovedInsts(RemovedInsts) {        if (New)          Replacer = new UsesReplacer(Inst, New); -      DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); +      LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");        RemovedInsts.insert(Inst);        /// The instructions removed here will be freed after completing        /// optimizeBlock() for all blocks as we need to keep track of the @@ -2351,10 +2392,10 @@ class TypePromotionTransaction {      ~InstructionRemover() override { delete Replacer; } -    /// \brief Resurrect the instruction and reassign it to the proper uses if +    /// Resurrect the instruction and reassign it to the proper uses if      /// new value was provided when build this action.      void undo() override { -      DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); +      LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");        Inserter.insert(Inst);        if (Replacer)          Replacer->undo(); @@ -2496,7 +2537,7 @@ void TypePromotionTransaction::rollback(  namespace { -/// \brief A helper class for matching addressing modes. +/// A helper class for matching addressing modes.  ///  /// This encapsulates the logic for matching the target-legal addressing modes.  class AddressingModeMatcher { @@ -2524,22 +2565,23 @@ class AddressingModeMatcher {    /// The ongoing transaction where every action should be registered.    TypePromotionTransaction &TPT; +  // A GEP which has too large offset to be folded into the addressing mode. +  std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP; +    /// This is set to true when we should not do profitability checks.    /// When true, IsProfitableToFoldIntoAddressingMode always returns true.    bool IgnoreProfitability; -  AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, -                        const TargetLowering &TLI, -                        const TargetRegisterInfo &TRI, -                        Type *AT, unsigned AS, -                        Instruction *MI, ExtAddrMode &AM, -                        const SetOfInstrs &InsertedInsts, -                        InstrToOrigTy &PromotedInsts, -                        TypePromotionTransaction &TPT) +  AddressingModeMatcher( +      SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI, +      const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, +      ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, +      InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, +      std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)        : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),          DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),          MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), -        PromotedInsts(PromotedInsts), TPT(TPT) { +        PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {      IgnoreProfitability = false;    } @@ -2551,28 +2593,27 @@ public:    /// optimizations.    /// \p PromotedInsts maps the instructions to their type before promotion.    /// \p The ongoing transaction where every action should be registered. -  static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, -                           Instruction *MemoryInst, -                           SmallVectorImpl<Instruction*> &AddrModeInsts, -                           const TargetLowering &TLI, -                           const TargetRegisterInfo &TRI, -                           const SetOfInstrs &InsertedInsts, -                           InstrToOrigTy &PromotedInsts, -                           TypePromotionTransaction &TPT) { +  static ExtAddrMode +  Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, +        SmallVectorImpl<Instruction *> &AddrModeInsts, +        const TargetLowering &TLI, const TargetRegisterInfo &TRI, +        const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, +        TypePromotionTransaction &TPT, +        std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {      ExtAddrMode Result; -    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, -                                         AccessTy, AS, +    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,                                           MemoryInst, Result, InsertedInsts, -                                         PromotedInsts, TPT).matchAddr(V, 0); +                                         PromotedInsts, TPT, LargeOffsetGEP) +                       .matchAddr(V, 0);      (void)Success; assert(Success && "Couldn't select *anything*?");      return Result;    }  private:    bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); -  bool matchAddr(Value *V, unsigned Depth); -  bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, +  bool matchAddr(Value *Addr, unsigned Depth); +  bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,                            bool *MovedAway = nullptr);    bool isProfitableToFoldIntoAddressingMode(Instruction *I,                                              ExtAddrMode &AMBefore, @@ -2582,20 +2623,21 @@ private:                               Value *PromotedOperand) const;  }; -/// \brief Keep track of simplification of Phi nodes. +/// Keep track of simplification of Phi nodes.  /// Accept the set of all phi nodes and erase phi node from this set  /// if it is simplified.  class SimplificationTracker {    DenseMap<Value *, Value *> Storage;    const SimplifyQuery &SQ; -  SmallPtrSetImpl<PHINode *> &AllPhiNodes; -  SmallPtrSetImpl<SelectInst *> &AllSelectNodes; +  // Tracks newly created Phi nodes. We use a SetVector to get deterministic +  // order when iterating over the set in MatchPhiSet. +  SmallSetVector<PHINode *, 32> AllPhiNodes; +  // Tracks newly created Select nodes. +  SmallPtrSet<SelectInst *, 32> AllSelectNodes;  public: -  SimplificationTracker(const SimplifyQuery &sq, -                        SmallPtrSetImpl<PHINode *> &APN, -                        SmallPtrSetImpl<SelectInst *> &ASN) -      : SQ(sq), AllPhiNodes(APN), AllSelectNodes(ASN) {} +  SimplificationTracker(const SimplifyQuery &sq) +      : SQ(sq) {}    Value *Get(Value *V) {      do { @@ -2621,7 +2663,7 @@ public:            Put(PI, V);            PI->replaceAllUsesWith(V);            if (auto *PHI = dyn_cast<PHINode>(PI)) -            AllPhiNodes.erase(PHI); +            AllPhiNodes.remove(PHI);            if (auto *Select = dyn_cast<SelectInst>(PI))              AllSelectNodes.erase(Select);            PI->eraseFromParent(); @@ -2633,9 +2675,48 @@ public:    void Put(Value *From, Value *To) {      Storage.insert({ From, To });    } + +  void ReplacePhi(PHINode *From, PHINode *To) { +    Value* OldReplacement = Get(From); +    while (OldReplacement != From) { +      From = To; +      To = dyn_cast<PHINode>(OldReplacement); +      OldReplacement = Get(From); +    } +    assert(Get(To) == To && "Replacement PHI node is already replaced."); +    Put(From, To); +    From->replaceAllUsesWith(To); +    AllPhiNodes.remove(From); +    From->eraseFromParent(); +  } + +  SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; } + +  void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } + +  void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); } + +  unsigned countNewPhiNodes() const { return AllPhiNodes.size(); } + +  unsigned countNewSelectNodes() const { return AllSelectNodes.size(); } + +  void destroyNewNodes(Type *CommonType) { +    // For safe erasing, replace the uses with dummy value first. +    auto Dummy = UndefValue::get(CommonType); +    for (auto I : AllPhiNodes) { +      I->replaceAllUsesWith(Dummy); +      I->eraseFromParent(); +    } +    AllPhiNodes.clear(); +    for (auto I : AllSelectNodes) { +      I->replaceAllUsesWith(Dummy); +      I->eraseFromParent(); +    } +    AllSelectNodes.clear(); +  }  }; -/// \brief A helper class for combining addressing modes. +/// A helper class for combining addressing modes.  class AddressingModeCombiner {    typedef std::pair<Value *, BasicBlock *> ValueInBB;    typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping; @@ -2664,12 +2745,12 @@ public:    AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)        : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} -  /// \brief Get the combined AddrMode +  /// Get the combined AddrMode    const ExtAddrMode &getAddrMode() const {      return AddrModes[0];    } -  /// \brief Add a new AddrMode if it's compatible with the AddrModes we already +  /// Add a new AddrMode if it's compatible with the AddrModes we already    /// have.    /// \return True iff we succeeded in doing so.    bool addNewAddrMode(ExtAddrMode &NewAddrMode) { @@ -2694,29 +2775,35 @@ public:      else if (DifferentField != ThisDifferentField)        DifferentField = ExtAddrMode::MultipleFields; -    // If NewAddrMode differs in only one dimension, and that dimension isn't -    // the amount that ScaledReg is scaled by, then we can handle it by -    // inserting a phi/select later on. Even if NewAddMode is the same -    // we still need to collect it due to original value is different. -    // And later we will need all original values as anchors during -    // finding the common Phi node. +    // If NewAddrMode differs in more than one dimension we cannot handle it. +    bool CanHandle = DifferentField != ExtAddrMode::MultipleFields; + +    // If Scale Field is different then we reject. +    CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField; +      // We also must reject the case when base offset is different and      // scale reg is not null, we cannot handle this case due to merge of      // different offsets will be used as ScaleReg. -    if (DifferentField != ExtAddrMode::MultipleFields && -        DifferentField != ExtAddrMode::ScaleField && -        (DifferentField != ExtAddrMode::BaseOffsField || -         !NewAddrMode.ScaledReg)) { +    CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField || +                              !NewAddrMode.ScaledReg); + +    // We also must reject the case when GV is different and BaseReg installed +    // due to we want to use base reg as a merge of GV values. +    CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField || +                              !NewAddrMode.HasBaseReg); + +    // Even if NewAddMode is the same we still need to collect it due to +    // original value is different. And later we will need all original values +    // as anchors during finding the common Phi node. +    if (CanHandle)        AddrModes.emplace_back(NewAddrMode); -      return true; -    } +    else +      AddrModes.clear(); -    // We couldn't combine NewAddrMode with the rest, so return failure. -    AddrModes.clear(); -    return false; +    return CanHandle;    } -  /// \brief Combine the addressing modes we've collected into a single +  /// Combine the addressing modes we've collected into a single    /// addressing mode.    /// \return True iff we successfully combined them or we only had one so    /// didn't need to combine them anyway. @@ -2751,7 +2838,7 @@ public:    }  private: -  /// \brief Initialize Map with anchor values. For address seen in some BB +  /// Initialize Map with anchor values. For address seen in some BB    /// we set the value of different field saw in this address.    /// If address is not an instruction than basic block is set to null.    /// At the same time we find a common type for different field we will @@ -2784,9 +2871,9 @@ private:      return true;    } -  /// \brief We have mapping between value A and basic block where value A +  /// We have mapping between value A and basic block where value A    /// seen to other value B where B was a field in addressing mode represented -  /// by A. Also we have an original value C representin an address in some +  /// by A. Also we have an original value C representing an address in some    /// basic block. Traversing from C through phi and selects we ended up with    /// A's in a map. This utility function tries to find a value V which is a    /// field in addressing mode C and traversing through phi nodes and selects @@ -2809,62 +2896,46 @@ private:    //   <p, BB3> -> ?    // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3    Value *findCommon(FoldAddrToValueMapping &Map) { -    // Tracks of new created Phi nodes. -    SmallPtrSet<PHINode *, 32> NewPhiNodes; -    // Tracks of new created Select nodes. -    SmallPtrSet<SelectInst *, 32> NewSelectNodes; -    // Tracks the simplification of new created phi nodes. The reason we use +    // Tracks the simplification of newly created phi nodes. The reason we use      // this mapping is because we will add new created Phi nodes in AddrToBase.      // Simplification of Phi nodes is recursive, so some Phi node may      // be simplified after we added it to AddrToBase.      // Using this mapping we can find the current value in AddrToBase. -    SimplificationTracker ST(SQ, NewPhiNodes, NewSelectNodes); +    SimplificationTracker ST(SQ);      // First step, DFS to create PHI nodes for all intermediate blocks.      // Also fill traverse order for the second step.      SmallVector<ValueInBB, 32> TraverseOrder; -    InsertPlaceholders(Map, TraverseOrder, NewPhiNodes, NewSelectNodes); +    InsertPlaceholders(Map, TraverseOrder, ST);      // Second Step, fill new nodes by merged values and simplify if possible.      FillPlaceholders(Map, TraverseOrder, ST); -    if (!AddrSinkNewSelects && NewSelectNodes.size() > 0) { -      DestroyNodes(NewPhiNodes); -      DestroyNodes(NewSelectNodes); +    if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) { +      ST.destroyNewNodes(CommonType);        return nullptr;      }      // Now we'd like to match New Phi nodes to existed ones.      unsigned PhiNotMatchedCount = 0; -    if (!MatchPhiSet(NewPhiNodes, ST, AddrSinkNewPhis, PhiNotMatchedCount)) { -      DestroyNodes(NewPhiNodes); -      DestroyNodes(NewSelectNodes); +    if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) { +      ST.destroyNewNodes(CommonType);        return nullptr;      }      auto *Result = ST.Get(Map.find(Original)->second);      if (Result) { -      NumMemoryInstsPhiCreated += NewPhiNodes.size() + PhiNotMatchedCount; -      NumMemoryInstsSelectCreated += NewSelectNodes.size(); +      NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount; +      NumMemoryInstsSelectCreated += ST.countNewSelectNodes();      }      return Result;    } -  /// \brief Destroy nodes from a set. -  template <typename T> void DestroyNodes(SmallPtrSetImpl<T *> &Instructions) { -    // For safe erasing, replace the Phi with dummy value first. -    auto Dummy = UndefValue::get(CommonType); -    for (auto I : Instructions) { -      I->replaceAllUsesWith(Dummy); -      I->eraseFromParent(); -    } -  } - -  /// \brief Try to match PHI node to Candidate. +  /// Try to match PHI node to Candidate.    /// Matcher tracks the matched Phi nodes.    bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, -                    DenseSet<PHIPair> &Matcher, -                    SmallPtrSetImpl<PHINode *> &PhiNodesToMatch) { +                    SmallSetVector<PHIPair, 8> &Matcher, +                    SmallSetVector<PHINode *, 32> &PhiNodesToMatch) {      SmallVector<PHIPair, 8> WorkList;      Matcher.insert({ PHI, Candidate });      WorkList.push_back({ PHI, Candidate }); @@ -2908,13 +2979,16 @@ private:      return true;    } -  /// \brief For the given set of PHI nodes try to find their equivalents. +  /// For the given set of PHI nodes (in the SimplificationTracker) try +  /// to find their equivalents.    /// Returns false if this matching fails and creation of new Phi is disabled. -  bool MatchPhiSet(SmallPtrSetImpl<PHINode *> &PhiNodesToMatch, -                   SimplificationTracker &ST, bool AllowNewPhiNodes, +  bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,                     unsigned &PhiNotMatchedCount) { -    DenseSet<PHIPair> Matched; +    // Use a SetVector for Matched to make sure we do replacements (ReplacePhi) +    // in a deterministic order below. +    SmallSetVector<PHIPair, 8> Matched;      SmallPtrSet<PHINode *, 8> WillNotMatch; +    SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes();      while (PhiNodesToMatch.size()) {        PHINode *PHI = *PhiNodesToMatch.begin(); @@ -2938,12 +3012,8 @@ private:        }        if (IsMatched) {          // Replace all matched values and erase them. -        for (auto MV : Matched) { -          MV.first->replaceAllUsesWith(MV.second); -          PhiNodesToMatch.erase(MV.first); -          ST.Put(MV.first, MV.second); -          MV.first->eraseFromParent(); -        } +        for (auto MV : Matched) +          ST.ReplacePhi(MV.first, MV.second);          Matched.clear();          continue;        } @@ -2953,11 +3023,11 @@ private:        // Just remove all seen values in matcher. They will not match anything.        PhiNotMatchedCount += WillNotMatch.size();        for (auto *P : WillNotMatch) -        PhiNodesToMatch.erase(P); +        PhiNodesToMatch.remove(P);      }      return true;    } -  /// \brief Fill the placeholder with values from predecessors and simplify it. +  /// Fill the placeholder with values from predecessors and simplify it.    void FillPlaceholders(FoldAddrToValueMapping &Map,                          SmallVectorImpl<ValueInBB> &TraverseOrder,                          SimplificationTracker &ST) { @@ -3011,8 +3081,7 @@ private:    /// Also reports and order in what basic blocks have been traversed.    void InsertPlaceholders(FoldAddrToValueMapping &Map,                            SmallVectorImpl<ValueInBB> &TraverseOrder, -                          SmallPtrSetImpl<PHINode *> &NewPhiNodes, -                          SmallPtrSetImpl<SelectInst *> &NewSelectNodes) { +                          SimplificationTracker &ST) {      SmallVector<ValueInBB, 32> Worklist;      assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&             "Address must be a Phi or Select node"); @@ -3038,8 +3107,7 @@ private:        Instruction *CurrentI = cast<Instruction>(CurrentValue);        bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock; -      unsigned PredCount = -          std::distance(pred_begin(CurrentBlock), pred_end(CurrentBlock)); +      unsigned PredCount = pred_size(CurrentBlock);        // if Current Value is not defined in this basic block we are interested        // in values in predecessors.        if (!IsDefinedInThisBB) { @@ -3047,7 +3115,7 @@ private:          PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",                                         &CurrentBlock->front());          Map[Current] = PHI; -        NewPhiNodes.insert(PHI); +        ST.insertNewPhi(PHI);          // Add all predecessors in work list.          for (auto B : predecessors(CurrentBlock))            Worklist.push_back({ CurrentValue, B }); @@ -3061,7 +3129,7 @@ private:              SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,                                 OrigSelect->getName(), OrigSelect, OrigSelect);          Map[Current] = Select; -        NewSelectNodes.insert(Select); +        ST.insertNewSelect(Select);          // We are interested in True and False value in this basic block.          Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });          Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock }); @@ -3073,7 +3141,7 @@ private:          PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",                                         &CurrentBlock->front());          Map[Current] = PHI; -        NewPhiNodes.insert(PHI); +        ST.insertNewPhi(PHI);          // Add all predecessors in work list.          for (auto B : predecessors(CurrentBlock)) @@ -3167,7 +3235,7 @@ static bool MightBeFoldableInst(Instruction *I) {      // Don't touch identity bitcasts.      if (I->getType() == I->getOperand(0)->getType())        return false; -    return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); +    return I->getType()->isIntOrPtrTy();    case Instruction::PtrToInt:      // PtrToInt is always a noop, as we know that the int type is pointer sized.      return true; @@ -3187,7 +3255,7 @@ static bool MightBeFoldableInst(Instruction *I) {    }  } -/// \brief Check whether or not \p Val is a legal instruction for \p TLI. +/// Check whether or not \p Val is a legal instruction for \p TLI.  /// \note \p Val is assumed to be the product of some type promotion.  /// Therefore if \p Val has an undefined state in \p TLI, this is assumed  /// to be legal, as the non-promoted value would have had the same state. @@ -3207,9 +3275,9 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI,  namespace { -/// \brief Hepler class to perform type promotion. +/// Hepler class to perform type promotion.  class TypePromotionHelper { -  /// \brief Utility function to check whether or not a sign or zero extension +  /// Utility function to check whether or not a sign or zero extension    /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by    /// either using the operands of \p Inst or promoting \p Inst.    /// The type of the extension is defined by \p IsSExt. @@ -3223,13 +3291,13 @@ class TypePromotionHelper {    static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,                              const InstrToOrigTy &PromotedInsts, bool IsSExt); -  /// \brief Utility function to determine if \p OpIdx should be promoted when +  /// Utility function to determine if \p OpIdx should be promoted when    /// promoting \p Inst.    static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {      return !(isa<SelectInst>(Inst) && OpIdx == 0);    } -  /// \brief Utility function to promote the operand of \p Ext when this +  /// Utility function to promote the operand of \p Ext when this    /// operand is a promotable trunc or sext or zext.    /// \p PromotedInsts maps the instructions to their type before promotion.    /// \p CreatedInstsCost[out] contains the cost of all instructions @@ -3244,7 +3312,7 @@ class TypePromotionHelper {        SmallVectorImpl<Instruction *> *Exts,        SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); -  /// \brief Utility function to promote the operand of \p Ext when this +  /// Utility function to promote the operand of \p Ext when this    /// operand is promotable and is not a supported trunc or sext.    /// \p PromotedInsts maps the instructions to their type before promotion.    /// \p CreatedInstsCost[out] contains the cost of all the instructions @@ -3290,7 +3358,7 @@ public:                              SmallVectorImpl<Instruction *> *Truncs,                              const TargetLowering &TLI); -  /// \brief Given a sign/zero extend instruction \p Ext, return the approriate +  /// Given a sign/zero extend instruction \p Ext, return the appropriate    /// action to promote the operand of \p Ext instead of using Ext.    /// \return NULL if no promotable action is possible with the current    /// sign extension. @@ -3332,6 +3400,47 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,         (IsSExt && BinOp->hasNoSignedWrap())))      return true; +  // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) +  if ((Inst->getOpcode() == Instruction::And || +       Inst->getOpcode() == Instruction::Or)) +    return true; + +  // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) +  if (Inst->getOpcode() == Instruction::Xor) { +    const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)); +    // Make sure it is not a NOT. +    if (Cst && !Cst->getValue().isAllOnesValue()) +      return true; +  } + +  // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) +  // It may change a poisoned value into a regular value, like +  //     zext i32 (shrl i8 %val, 12)  -->  shrl i32 (zext i8 %val), 12 +  //          poisoned value                    regular value +  // It should be OK since undef covers valid value. +  if (Inst->getOpcode() == Instruction::LShr && !IsSExt) +    return true; + +  // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) +  // It may change a poisoned value into a regular value, like +  //     zext i32 (shl i8 %val, 12)  -->  shl i32 (zext i8 %val), 12 +  //          poisoned value                    regular value +  // It should be OK since undef covers valid value. +  if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { +    const Instruction *ExtInst = +        dyn_cast<const Instruction>(*Inst->user_begin()); +    if (ExtInst->hasOneUse()) { +      const Instruction *AndInst = +          dyn_cast<const Instruction>(*ExtInst->user_begin()); +      if (AndInst && AndInst->getOpcode() == Instruction::And) { +        const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1)); +        if (Cst && +            Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) +          return true; +      } +    } +  } +    // Check if we can do the following simplification.    // ext(trunc(opnd)) --> ext(opnd)    if (!isa<TruncInst>(Inst)) @@ -3496,19 +3605,19 @@ Value *TypePromotionHelper::promoteOperandForOther(    // Step #3.    Instruction *ExtForOpnd = Ext; -  DEBUG(dbgs() << "Propagate Ext to operands\n"); +  LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");    for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;         ++OpIdx) { -    DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); +    LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');      if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||          !shouldExtOperand(ExtOpnd, OpIdx)) { -      DEBUG(dbgs() << "No need to propagate\n"); +      LLVM_DEBUG(dbgs() << "No need to propagate\n");        continue;      }      // Check if we can statically extend the operand.      Value *Opnd = ExtOpnd->getOperand(OpIdx);      if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { -      DEBUG(dbgs() << "Statically extend\n"); +      LLVM_DEBUG(dbgs() << "Statically extend\n");        unsigned BitWidth = Ext->getType()->getIntegerBitWidth();        APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)                              : Cst->getValue().zext(BitWidth); @@ -3517,16 +3626,16 @@ Value *TypePromotionHelper::promoteOperandForOther(      }      // UndefValue are typed, so we have to statically sign extend them.      if (isa<UndefValue>(Opnd)) { -      DEBUG(dbgs() << "Statically extend\n"); +      LLVM_DEBUG(dbgs() << "Statically extend\n");        TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));        continue;      } -    // Otherwise we have to explicity sign extend the operand. +    // Otherwise we have to explicitly sign extend the operand.      // Check if Ext was reused to extend an operand.      if (!ExtForOpnd) {        // If yes, create a new one. -      DEBUG(dbgs() << "More operands to ext\n"); +      LLVM_DEBUG(dbgs() << "More operands to ext\n");        Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())          : TPT.createZExt(Ext, Opnd, Ext->getType());        if (!isa<Instruction>(ValForExtOpnd)) { @@ -3547,7 +3656,7 @@ Value *TypePromotionHelper::promoteOperandForOther(      ExtForOpnd = nullptr;    }    if (ExtForOpnd == Ext) { -    DEBUG(dbgs() << "Extension is useless now\n"); +    LLVM_DEBUG(dbgs() << "Extension is useless now\n");      TPT.eraseInstruction(Ext);    }    return ExtOpnd; @@ -3563,7 +3672,8 @@ Value *TypePromotionHelper::promoteOperandForOther(  /// \return True if the promotion is profitable, false otherwise.  bool AddressingModeMatcher::isPromotionProfitable(      unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { -  DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); +  LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost +                    << '\n');    // The cost of the new extensions is greater than the cost of the    // old extension plus what we folded.    // This is not profitable. @@ -3613,8 +3723,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,    case Instruction::BitCast:      // BitCast is always a noop, and we can handle it as long as it is      // int->int or pointer->pointer (we don't want int<->fp or something). -    if ((AddrInst->getOperand(0)->getType()->isPointerTy() || -         AddrInst->getOperand(0)->getType()->isIntegerTy()) && +    if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&          // Don't touch identity bitcasts.  These were probably put here by LSR,          // and we don't want to mess around with them.  Assume it knows what it          // is doing. @@ -3714,6 +3823,30 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,          // Check to see if we can fold the base pointer in too.          if (matchAddr(AddrInst->getOperand(0), Depth+1))            return true; +      } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) && +                 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && +                 ConstantOffset > 0) { +        // Record GEPs with non-zero offsets as candidates for splitting in the +        // event that the offset cannot fit into the r+i addressing mode. +        // Simple and common case that only one GEP is used in calculating the +        // address for the memory access. +        Value *Base = AddrInst->getOperand(0); +        auto *BaseI = dyn_cast<Instruction>(Base); +        auto *GEP = cast<GetElementPtrInst>(AddrInst); +        if (isa<Argument>(Base) || isa<GlobalValue>(Base) || +            (BaseI && !isa<CastInst>(BaseI) && +             !isa<GetElementPtrInst>(BaseI))) { +          // If the base is an instruction, make sure the GEP is not in the same +          // basic block as the base. If the base is an argument or global +          // value, make sure the GEP is not in the entry block.  Otherwise, +          // instruction selection can undo the split.  Also make sure the +          // parent block allows inserting non-PHI instructions before the +          // terminator. +          BasicBlock *Parent = +              BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); +          if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad()) +            LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); +        }        }        AddrMode.BaseOffs -= ConstantOffset;        return false; @@ -3810,7 +3943,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,                                 PromotedOperand)) {        AddrMode = BackupAddrMode;        AddrModeInsts.resize(OldSize); -      DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); +      LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");        TPT.rollback(LastKnownGood);        return false;      } @@ -4124,12 +4257,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,      // will tell us if the addressing mode for the memory operation will      // *actually* cover the shared instruction.      ExtAddrMode Result; +    std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, +                                                                      0);      TypePromotionTransaction::ConstRestorationPt LastKnownGood =          TPT.getRestorationPoint(); -    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, -                                  AddressAccessTy, AS, -                                  MemoryInst, Result, InsertedInsts, -                                  PromotedInsts, TPT); +    AddressingModeMatcher Matcher( +        MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, +        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);      Matcher.IgnoreProfitability = true;      bool Success = Matcher.matchAddr(Address, 0);      (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -4231,11 +4365,24 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,      // the result may differ depending on what other uses our candidate      // addressing instructions might have.      AddrModeInsts.clear(); +    std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, +                                                                      0);      ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(          V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, -        InsertedInsts, PromotedInsts, TPT); -    NewAddrMode.OriginalValue = V; +        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); + +    GetElementPtrInst *GEP = LargeOffsetGEP.first; +    if (GEP && GEP->getParent() != MemoryInst->getParent() && +        !NewGEPBases.count(GEP)) { +      // If splitting the underlying data structure can reduce the offset of a +      // GEP, collect the GEP.  Skip the GEPs that are the new bases of +      // previously split data structures. +      LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); +      if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end()) +        LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size(); +    } +    NewAddrMode.OriginalValue = V;      if (!AddrModes.addNewAddrMode(NewAddrMode))        break;    } @@ -4259,7 +4406,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,    if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {          return IsNonLocalValue(V, MemoryInst->getParent());                    })) { -    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n"); +    LLVM_DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode +                      << "\n");      return false;    } @@ -4278,17 +4426,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,    Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;    if (SunkAddr) { -    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " -                 << *MemoryInst << "\n"); +    LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode +                      << " for " << *MemoryInst << "\n");      if (SunkAddr->getType() != Addr->getType())        SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());    } else if (AddrSinkUsingGEPs || -             (!AddrSinkUsingGEPs.getNumOccurrences() && TM && -              SubtargetInfo->useAA())) { +             (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) {      // By default, we use the GEP-based method when AA is used later. This      // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. -    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " -                 << *MemoryInst << "\n"); +    LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode +                      << " for " << *MemoryInst << "\n");      Type *IntPtrTy = DL->getIntPtrType(Addr->getType());      Value *ResultPtr = nullptr, *ResultIndex = nullptr; @@ -4427,8 +4574,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,           DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))        return false; -    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " -                 << *MemoryInst << "\n"); +    LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode +                      << " for " << *MemoryInst << "\n");      Type *IntPtrTy = DL->getIntPtrType(Addr->getType());      Value *Result = nullptr; @@ -4554,7 +4701,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {    return MadeChange;  } -/// \brief Check if all the uses of \p Val are equivalent (or free) zero or +/// Check if all the uses of \p Val are equivalent (or free) zero or  /// sign extensions.  static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {    assert(!Val->use_empty() && "Input must have at least one use"); @@ -4602,7 +4749,7 @@ static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {    return true;  } -/// \brief Try to speculatively promote extensions in \p Exts and continue +/// Try to speculatively promote extensions in \p Exts and continue  /// promoting through newly promoted operands recursively as far as doing so is  /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.  /// When some promotion happened, \p TPT contains the proper state to revert @@ -4728,7 +4875,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {          }          if (!DT.dominates(Pt, Inst))            // Give up if we need to merge in a common dominator as the -          // expermients show it is not profitable. +          // experiments show it is not profitable.            continue;          Inst->replaceAllUsesWith(Pt);          RemovedInsts.insert(Inst); @@ -4744,6 +4891,154 @@ bool CodeGenPrepare::mergeSExts(Function &F) {    return Changed;  } +// Spliting large data structures so that the GEPs accessing them can have +// smaller offsets so that they can be sunk to the same blocks as their users. +// For example, a large struct starting from %base is splitted into two parts +// where the second part starts from %new_base. +// +// Before: +// BB0: +//   %base     = +// +// BB1: +//   %gep0     = gep %base, off0 +//   %gep1     = gep %base, off1 +//   %gep2     = gep %base, off2 +// +// BB2: +//   %load1    = load %gep0 +//   %load2    = load %gep1 +//   %load3    = load %gep2 +// +// After: +// BB0: +//   %base     = +//   %new_base = gep %base, off0 +// +// BB1: +//   %new_gep0 = %new_base +//   %new_gep1 = gep %new_base, off1 - off0 +//   %new_gep2 = gep %new_base, off2 - off0 +// +// BB2: +//   %load1    = load i32, i32* %new_gep0 +//   %load2    = load i32, i32* %new_gep1 +//   %load3    = load i32, i32* %new_gep2 +// +// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because +// their offsets are smaller enough to fit into the addressing mode. +bool CodeGenPrepare::splitLargeGEPOffsets() { +  bool Changed = false; +  for (auto &Entry : LargeOffsetGEPMap) { +    Value *OldBase = Entry.first; +    SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>> +        &LargeOffsetGEPs = Entry.second; +    auto compareGEPOffset = +        [&](const std::pair<GetElementPtrInst *, int64_t> &LHS, +            const std::pair<GetElementPtrInst *, int64_t> &RHS) { +          if (LHS.first == RHS.first) +            return false; +          if (LHS.second != RHS.second) +            return LHS.second < RHS.second; +          return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first]; +        }; +    // Sorting all the GEPs of the same data structures based on the offsets. +    llvm::sort(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end(), +               compareGEPOffset); +    LargeOffsetGEPs.erase( +        std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), +        LargeOffsetGEPs.end()); +    // Skip if all the GEPs have the same offsets. +    if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second) +      continue; +    GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first; +    int64_t BaseOffset = LargeOffsetGEPs.begin()->second; +    Value *NewBaseGEP = nullptr; + +    auto LargeOffsetGEP = LargeOffsetGEPs.begin(); +    while (LargeOffsetGEP != LargeOffsetGEPs.end()) { +      GetElementPtrInst *GEP = LargeOffsetGEP->first; +      int64_t Offset = LargeOffsetGEP->second; +      if (Offset != BaseOffset) { +        TargetLowering::AddrMode AddrMode; +        AddrMode.BaseOffs = Offset - BaseOffset; +        // The result type of the GEP might not be the type of the memory +        // access. +        if (!TLI->isLegalAddressingMode(*DL, AddrMode, +                                        GEP->getResultElementType(), +                                        GEP->getAddressSpace())) { +          // We need to create a new base if the offset to the current base is +          // too large to fit into the addressing mode. So, a very large struct +          // may be splitted into several parts. +          BaseGEP = GEP; +          BaseOffset = Offset; +          NewBaseGEP = nullptr; +        } +      } + +      // Generate a new GEP to replace the current one. +      IRBuilder<> Builder(GEP); +      Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); +      Type *I8PtrTy = +          Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace()); +      Type *I8Ty = Builder.getInt8Ty(); + +      if (!NewBaseGEP) { +        // Create a new base if we don't have one yet.  Find the insertion +        // pointer for the new base first. +        BasicBlock::iterator NewBaseInsertPt; +        BasicBlock *NewBaseInsertBB; +        if (auto *BaseI = dyn_cast<Instruction>(OldBase)) { +          // If the base of the struct is an instruction, the new base will be +          // inserted close to it. +          NewBaseInsertBB = BaseI->getParent(); +          if (isa<PHINode>(BaseI)) +            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); +          else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) { +            NewBaseInsertBB = +                SplitEdge(NewBaseInsertBB, Invoke->getNormalDest()); +            NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); +          } else +            NewBaseInsertPt = std::next(BaseI->getIterator()); +        } else { +          // If the current base is an argument or global value, the new base +          // will be inserted to the entry block. +          NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); +          NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); +        } +        IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); +        // Create a new base. +        Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset); +        NewBaseGEP = OldBase; +        if (NewBaseGEP->getType() != I8PtrTy) +          NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); +        NewBaseGEP = +            NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); +        NewGEPBases.insert(NewBaseGEP); +      } + +      Value *NewGEP = NewBaseGEP; +      if (Offset == BaseOffset) { +        if (GEP->getType() != I8PtrTy) +          NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); +      } else { +        // Calculate the new offset for the new GEP. +        Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset); +        NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); + +        if (GEP->getType() != I8PtrTy) +          NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); +      } +      GEP->replaceAllUsesWith(NewGEP); +      LargeOffsetGEPID.erase(GEP); +      LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP); +      GEP->eraseFromParent(); +      Changed = true; +    } +  } +  return Changed; +} +  /// Return true, if an ext(load) can be formed from an extension in  /// \p MovedExts.  bool CodeGenPrepare::canFormExtLd( @@ -5053,8 +5348,7 @@ bool CodeGenPrepare::optimizeExtUses(Instruction *I) {  //   x = phi x1', x2'  //   y = and x, 0xff  bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { -  if (!Load->isSimple() || -      !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) +  if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())      return false;    // Skip loads we've already transformed. @@ -5519,7 +5813,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {  namespace { -/// \brief Helper class to promote a scalar operation to a vector one. +/// Helper class to promote a scalar operation to a vector one.  /// This class is used to move downward extractelement transition.  /// E.g.,  /// a = vector_op <2 x i32> @@ -5556,7 +5850,7 @@ class VectorPromoteHelper {    /// Instruction that will be combined with the transition.    Instruction *CombineInst = nullptr; -  /// \brief The instruction that represents the current end of the transition. +  /// The instruction that represents the current end of the transition.    /// Since we are faking the promotion until we reach the end of the chain    /// of computation, we need a way to get the current end of the transition.    Instruction *getEndOfTransition() const { @@ -5565,7 +5859,7 @@ class VectorPromoteHelper {      return InstsToBePromoted.back();    } -  /// \brief Return the index of the original value in the transition. +  /// Return the index of the original value in the transition.    /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,    /// c, is at index 0.    unsigned getTransitionOriginalValueIdx() const { @@ -5574,7 +5868,7 @@ class VectorPromoteHelper {      return 0;    } -  /// \brief Return the index of the index in the transition. +  /// Return the index of the index in the transition.    /// E.g., for "extractelement <2 x i32> c, i32 0" the index    /// is at index 1.    unsigned getTransitionIdx() const { @@ -5583,7 +5877,7 @@ class VectorPromoteHelper {      return 1;    } -  /// \brief Get the type of the transition. +  /// Get the type of the transition.    /// This is the type of the original value.    /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the    /// transition is <2 x i32>. @@ -5591,7 +5885,7 @@ class VectorPromoteHelper {      return Transition->getOperand(getTransitionOriginalValueIdx())->getType();    } -  /// \brief Promote \p ToBePromoted by moving \p Def downward through. +  /// Promote \p ToBePromoted by moving \p Def downward through.    /// I.e., we have the following sequence:    /// Def = Transition <ty1> a to <ty2>    /// b = ToBePromoted <ty2> Def, ... @@ -5600,7 +5894,7 @@ class VectorPromoteHelper {    /// Def = Transition <ty1> ToBePromoted to <ty2>    void promoteImpl(Instruction *ToBePromoted); -  /// \brief Check whether or not it is profitable to promote all the +  /// Check whether or not it is profitable to promote all the    /// instructions enqueued to be promoted.    bool isProfitableToPromote() {      Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); @@ -5646,12 +5940,13 @@ class VectorPromoteHelper {        VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,                                                 Arg0OVK, Arg1OVK);      } -    DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " -                 << ScalarCost << "\nVector: " << VectorCost << '\n'); +    LLVM_DEBUG( +        dbgs() << "Estimated cost of computation to be promoted:\nScalar: " +               << ScalarCost << "\nVector: " << VectorCost << '\n');      return ScalarCost > VectorCost;    } -  /// \brief Generate a constant vector with \p Val with the same +  /// Generate a constant vector with \p Val with the same    /// number of elements as the transition.    /// \p UseSplat defines whether or not \p Val should be replicated    /// across the whole vector. @@ -5686,7 +5981,7 @@ class VectorPromoteHelper {      return ConstantVector::get(ConstVec);    } -  /// \brief Check if promoting to a vector type an operand at \p OperandIdx +  /// Check if promoting to a vector type an operand at \p OperandIdx    /// in \p Use can trigger undefined behavior.    static bool canCauseUndefinedBehavior(const Instruction *Use,                                          unsigned OperandIdx) { @@ -5718,13 +6013,13 @@ public:      assert(Transition && "Do not know how to promote null");    } -  /// \brief Check if we can promote \p ToBePromoted to \p Type. +  /// Check if we can promote \p ToBePromoted to \p Type.    bool canPromote(const Instruction *ToBePromoted) const {      // We could support CastInst too.      return isa<BinaryOperator>(ToBePromoted);    } -  /// \brief Check if it is profitable to promote \p ToBePromoted +  /// Check if it is profitable to promote \p ToBePromoted    /// by moving downward the transition through.    bool shouldPromote(const Instruction *ToBePromoted) const {      // Promote only if all the operands can be statically expanded. @@ -5752,23 +6047,23 @@ public:                 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));    } -  /// \brief Check whether or not \p Use can be combined +  /// Check whether or not \p Use can be combined    /// with the transition.    /// I.e., is it possible to do Use(Transition) => AnotherUse?    bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } -  /// \brief Record \p ToBePromoted as part of the chain to be promoted. +  /// Record \p ToBePromoted as part of the chain to be promoted.    void enqueueForPromotion(Instruction *ToBePromoted) {      InstsToBePromoted.push_back(ToBePromoted);    } -  /// \brief Set the instruction that will be combined with the transition. +  /// Set the instruction that will be combined with the transition.    void recordCombineInstruction(Instruction *ToBeCombined) {      assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");      CombineInst = ToBeCombined;    } -  /// \brief Promote all the instructions enqueued for promotion if it is +  /// Promote all the instructions enqueued for promotion if it is    /// is profitable.    /// \return True if the promotion happened, false otherwise.    bool promote() { @@ -5852,35 +6147,36 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {    //   => we would need to check that we are moving it at a cheaper place and    //      we do not do that for now.    BasicBlock *Parent = Inst->getParent(); -  DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); +  LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');    VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);    // If the transition has more than one use, assume this is not going to be    // beneficial.    while (Inst->hasOneUse()) {      Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); -    DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); +    LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');      if (ToBePromoted->getParent() != Parent) { -      DEBUG(dbgs() << "Instruction to promote is in a different block (" -                   << ToBePromoted->getParent()->getName() -                   << ") than the transition (" << Parent->getName() << ").\n"); +      LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block (" +                        << ToBePromoted->getParent()->getName() +                        << ") than the transition (" << Parent->getName() +                        << ").\n");        return false;      }      if (VPH.canCombine(ToBePromoted)) { -      DEBUG(dbgs() << "Assume " << *Inst << '\n' -                   << "will be combined with: " << *ToBePromoted << '\n'); +      LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n' +                        << "will be combined with: " << *ToBePromoted << '\n');        VPH.recordCombineInstruction(ToBePromoted);        bool Changed = VPH.promote();        NumStoreExtractExposed += Changed;        return Changed;      } -    DEBUG(dbgs() << "Try promoting.\n"); +    LLVM_DEBUG(dbgs() << "Try promoting.\n");      if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))        return false; -    DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); +    LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");      VPH.enqueueForPromotion(ToBePromoted);      Inst = ToBePromoted; @@ -5890,7 +6186,7 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {  /// For the instruction sequence of store below, F and I values  /// are bundled together as an i64 value before being stored into memory. -/// Sometimes it is more efficent to generate separate stores for F and I, +/// Sometimes it is more efficient to generate separate stores for F and I,  /// which can remove the bitwise instructions or sink them to colder places.  ///  ///   (store (or (zext (bitcast F to i32) to i64), @@ -5978,12 +6274,13 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,    if (HBC && HBC->getParent() != SI.getParent())      HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); +  bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();    auto CreateSplitStore = [&](Value *V, bool Upper) {      V = Builder.CreateZExtOrBitCast(V, SplitStoreType);      Value *Addr = Builder.CreateBitCast(          SI.getOperand(1),          SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); -    if (Upper) +    if ((IsLE && Upper) || (!IsLE && !Upper))        Addr = Builder.CreateGEP(            SplitStoreType, Addr,            ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); @@ -6270,6 +6567,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {        /// The GEP operand must be a pointer, so must its result -> BitCast        Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),                                          GEPI->getName(), GEPI); +      NC->setDebugLoc(GEPI->getDebugLoc());        GEPI->replaceAllUsesWith(NC);        GEPI->eraseFromParent();        ++NumGEPsElim; @@ -6374,7 +6672,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {          // after it.          if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())            continue; -        DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); +        LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" +                          << *DVI << ' ' << *VI);          DVI->removeFromParent();          if (isa<PHINode>(VI))            DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); @@ -6388,7 +6687,7 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {    return MadeChange;  } -/// \brief Scale down both weights to fit into uint32_t. +/// Scale down both weights to fit into uint32_t.  static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {    uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;    uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1; @@ -6396,7 +6695,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {    NewFalse = NewFalse / Scale;  } -/// \brief Some targets prefer to split a conditional branch like: +/// Some targets prefer to split a conditional branch like:  /// \code  ///   %0 = icmp ne i32 %a, 0  ///   %1 = icmp ne i32 %b, 0 @@ -6453,7 +6752,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {          !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp()))   )        continue; -    DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); +    LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());      // Create a new BB.      auto TmpBB = @@ -6465,8 +6764,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {      Br1->setCondition(Cond1);      LogicOp->eraseFromParent(); -    // Depending on the conditon we have to either replace the true or the false -    // successor of the original branch instruction. +    // Depending on the condition we have to either replace the true or the +    // false successor of the original branch instruction.      if (Opc == Instruction::And)        Br1->setSuccessor(0, TmpBB);      else @@ -6519,8 +6818,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {        // We have flexibility in setting Prob for BB1 and Prob for NewBB.        // The requirement is that        //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) -      //     = TrueProb for orignal BB. -      // Assuming the orignal weights are A and B, one choice is to set BB1's +      //     = TrueProb for original BB. +      // Assuming the original weights are A and B, one choice is to set BB1's        // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice        // assumes that        //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. @@ -6554,8 +6853,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {        // We have flexibility in setting Prob for BB1 and Prob for TmpBB.        // The requirement is that        //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) -      //     = FalseProb for orignal BB. -      // Assuming the orignal weights are A and B, one choice is to set BB1's +      //     = FalseProb for original BB. +      // Assuming the original weights are A and B, one choice is to set BB1's        // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice        // assumes that        //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. @@ -6581,8 +6880,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {      MadeChange = true; -    DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); -          TmpBB->dump()); +    LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); +               TmpBB->dump());    }    return MadeChange;  } diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 98e22b24d37a..840e5ede6444 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -113,7 +113,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,    // FIXME: It may be possible to remove the isKill() restriction once PR18663    // has been properly fixed. There can be value in processing kills as seen in    // the AggressiveAntiDepBreaker class. -  if (MI.isDebugValue() || MI.isKill()) +  if (MI.isDebugInstr() || MI.isKill())      return;    assert(Count < InsertPosIndex && "Instruction index out of expected range!"); @@ -170,11 +170,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {    // FIXME: The issue with predicated instruction is more complex. We are being    // conservative here because the kill markers cannot be trusted after    // if-conversion: -  // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] +  // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14]    // ... -  // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395] -  // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12] -  // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) +  // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] +  // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] +  // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8)    //    // The first R6 kill is not really a kill since it's killed by a predicated    // instruction which may not be executed. The second R6 def may or may not @@ -461,14 +461,14 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,  #ifndef NDEBUG    { -    DEBUG(dbgs() << "Critical path has total latency " -          << (Max->getDepth() + Max->Latency) << "\n"); -    DEBUG(dbgs() << "Available regs:"); +    LLVM_DEBUG(dbgs() << "Critical path has total latency " +                      << (Max->getDepth() + Max->Latency) << "\n"); +    LLVM_DEBUG(dbgs() << "Available regs:");      for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {        if (KillIndices[Reg] == ~0u) -        DEBUG(dbgs() << " " << printReg(Reg, TRI)); +        LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));      } -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');    }  #endif @@ -534,7 +534,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,      // FIXME: It may be possible to remove the isKill() restriction once PR18663      // has been properly fixed. There can be value in processing kills as seen      // in the AggressiveAntiDepBreaker class. -    if (MI.isDebugValue() || MI.isKill()) +    if (MI.isDebugInstr() || MI.isKill())        continue;      // Check if this instruction has a dependence on the critical path that @@ -645,10 +645,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,                                                       AntiDepReg,                                                       LastNewReg[AntiDepReg],                                                       RC, ForbidRegs)) { -        DEBUG(dbgs() << "Breaking anti-dependence edge on " -                     << printReg(AntiDepReg, TRI) << " with " -                     << RegRefs.count(AntiDepReg) << " references" -                     << " using " << printReg(NewReg, TRI) << "!\n"); +        LLVM_DEBUG(dbgs() << "Breaking anti-dependence edge on " +                          << printReg(AntiDepReg, TRI) << " with " +                          << RegRefs.count(AntiDepReg) << " references" +                          << " using " << printReg(NewReg, TRI) << "!\n");          // Update the references to the old register to refer to the new          // register. diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 848db444270d..cd302e78cc3e 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -222,7 +222,7 @@ VLIWPacketizerList::~VLIWPacketizerList() {  // End the current packet, bundle packet instructions and reset DFA state.  void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,                                     MachineBasicBlock::iterator MI) { -  DEBUG({ +  LLVM_DEBUG({      if (!CurrentPacketMIs.empty()) {        dbgs() << "Finalizing packet:\n";        for (MachineInstr *MI : CurrentPacketMIs) @@ -235,7 +235,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,    }    CurrentPacketMIs.clear();    ResourceTracker->clearResources(); -  DEBUG(dbgs() << "End packet\n"); +  LLVM_DEBUG(dbgs() << "End packet\n");  }  // Bundle machine instructions into packets. @@ -248,7 +248,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,                               std::distance(BeginItr, EndItr));    VLIWScheduler->schedule(); -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "Scheduling DAG of the packetize region\n";      for (SUnit &SU : VLIWScheduler->SUnits)        SU.dumpAll(VLIWScheduler); @@ -287,10 +287,10 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,      assert(SUI && "Missing SUnit Info!");      // Ask DFA if machine resource is available for MI. -    DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI); +    LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);      bool ResourceAvail = ResourceTracker->canReserveResources(MI); -    DEBUG({ +    LLVM_DEBUG({        if (ResourceAvail)          dbgs() << "  Resources are available for adding MI to packet\n";        else @@ -302,31 +302,33 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,          SUnit *SUJ = MIToSUnit[MJ];          assert(SUJ && "Missing SUnit Info!"); -        DEBUG(dbgs() << "  Checking against MJ " << *MJ); +        LLVM_DEBUG(dbgs() << "  Checking against MJ " << *MJ);          // Is it legal to packetize SUI and SUJ together.          if (!isLegalToPacketizeTogether(SUI, SUJ)) { -          DEBUG(dbgs() << "  Not legal to add MI, try to prune\n"); +          LLVM_DEBUG(dbgs() << "  Not legal to add MI, try to prune\n");            // Allow packetization if dependency can be pruned.            if (!isLegalToPruneDependencies(SUI, SUJ)) {              // End the packet if dependency cannot be pruned. -            DEBUG(dbgs() << "  Could not prune dependencies for adding MI\n"); +            LLVM_DEBUG(dbgs() +                       << "  Could not prune dependencies for adding MI\n");              endPacket(MBB, MI);              break;            } -          DEBUG(dbgs() << "  Pruned dependence for adding MI\n"); +          LLVM_DEBUG(dbgs() << "  Pruned dependence for adding MI\n");          }        }      } else { -      DEBUG(if (ResourceAvail) -        dbgs() << "Resources are available, but instruction should not be " -                  "added to packet\n  " << MI); +      LLVM_DEBUG(if (ResourceAvail) dbgs() +                 << "Resources are available, but instruction should not be " +                    "added to packet\n  " +                 << MI);        // End the packet if resource is not available, or if the instruction        // shoud not be added to the current packet.        endPacket(MBB, MI);      }      // Add MI to the current packet. -    DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n'); +    LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');      BeginItr = addToPacket(MI);    } // For all instructions in the packetization range. diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index e6a54bb300f2..ff44c5660bad 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -125,7 +125,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {        // If the instruction is dead, delete it!        if (isDead(MI)) { -        DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); +        LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);          // It is possible that some DBG_VALUE instructions refer to this          // instruction.  They get marked as undef and will be deleted          // in the live debug variable analysis. diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp index 7d7eb57352a2..c83db476a4de 100644 --- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -439,7 +439,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {            const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);            CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);            if (CrossCopy) -            DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI); +            LLVM_DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);          }          if (!CrossCopy) @@ -520,17 +520,15 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {        transferDefinedLanesStep(MO, Info.DefinedLanes);    } -  DEBUG( -    dbgs() << "Defined/Used lanes:\n"; -    for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { -      unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); -      const VRegInfo &Info = VRegInfos[RegIdx]; -      dbgs() << printReg(Reg, nullptr) -             << " Used: " << PrintLaneMask(Info.UsedLanes) -             << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; -    } -    dbgs() << "\n"; -  ); +  LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0; +                                                     RegIdx < NumVirtRegs; +                                                     ++RegIdx) { +    unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); +    const VRegInfo &Info = VRegInfos[RegIdx]; +    dbgs() << printReg(Reg, nullptr) +           << " Used: " << PrintLaneMask(Info.UsedLanes) +           << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; +  } dbgs() << "\n";);    bool Again = false;    // Mark operands as dead/unused. @@ -545,18 +543,19 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {          unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);          const VRegInfo &RegInfo = VRegInfos[RegIdx];          if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) { -          DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); +          LLVM_DEBUG(dbgs() +                     << "Marking operand '" << MO << "' as dead in " << MI);            MO.setIsDead();          }          if (MO.readsReg()) {            bool CrossCopy = false;            if (isUndefRegAtInput(MO, RegInfo)) { -            DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " -                  << MI); +            LLVM_DEBUG(dbgs() +                       << "Marking operand '" << MO << "' as undef in " << MI);              MO.setIsUndef();            } else if (isUndefInput(MO, &CrossCopy)) { -            DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " -                  << MI); +            LLVM_DEBUG(dbgs() +                       << "Marking operand '" << MO << "' as undef in " << MI);              MO.setIsUndef();              if (CrossCopy)                Again = true; @@ -577,7 +576,7 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {    // so we safe the compile time.    MRI = &MF.getRegInfo();    if (!MRI->subRegLivenessEnabled()) { -    DEBUG(dbgs() << "Skipping Detect dead lanes pass\n"); +    LLVM_DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");      return false;    } diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 39d80c0bf9bd..4586649d17f0 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -18,6 +18,7 @@  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/EHPersonalities.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetPassConfig.h" @@ -33,7 +34,6 @@  #include "llvm/Pass.h"  #include "llvm/Support/Casting.h"  #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Utils/Local.h"  #include <cstddef>  using namespace llvm; @@ -195,9 +195,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {    if (Resumes.empty())      return false; -  // Check the personality, don't do anything if it's funclet-based. +  // Check the personality, don't do anything if it's scope-based.    EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn()); -  if (isFuncletEHPersonality(Pers)) +  if (isScopedEHPersonality(Pers))      return false;    LLVMContext &Ctx = Fn.getContext(); diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index 6294ff450113..098afd885f2f 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -185,7 +185,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {    // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to    // get right.    if (!MBB->livein_empty()) { -    DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); +    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");      return false;    } @@ -195,18 +195,18 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {    // terminators never have side effects or define any used register values.    for (MachineBasicBlock::iterator I = MBB->begin(),         E = MBB->getFirstTerminator(); I != E; ++I) { -    if (I->isDebugValue()) +    if (I->isDebugInstr())        continue;      if (++InstrCount > BlockInstrLimit && !Stress) { -      DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " -                   << BlockInstrLimit << " instructions.\n"); +      LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " +                        << BlockInstrLimit << " instructions.\n");        return false;      }      // There shouldn't normally be any phis in a single-predecessor block.      if (I->isPHI()) { -      DEBUG(dbgs() << "Can't hoist: " << *I); +      LLVM_DEBUG(dbgs() << "Can't hoist: " << *I);        return false;      } @@ -214,21 +214,21 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {      // speculate GOT or constant pool loads that are guaranteed not to trap,      // but we don't support that for now.      if (I->mayLoad()) { -      DEBUG(dbgs() << "Won't speculate load: " << *I); +      LLVM_DEBUG(dbgs() << "Won't speculate load: " << *I);        return false;      }      // We never speculate stores, so an AA pointer isn't necessary.      bool DontMoveAcrossStore = true;      if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) { -      DEBUG(dbgs() << "Can't speculate: " << *I); +      LLVM_DEBUG(dbgs() << "Can't speculate: " << *I);        return false;      }      // Check for any dependencies on Head instructions.      for (const MachineOperand &MO : I->operands()) {        if (MO.isRegMask()) { -        DEBUG(dbgs() << "Won't speculate regmask: " << *I); +        LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);          return false;        }        if (!MO.isReg()) @@ -246,9 +246,10 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {        if (!DefMI || DefMI->getParent() != Head)          continue;        if (InsertAfter.insert(DefMI).second) -        DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " << *DefMI); +        LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " +                          << *DefMI);        if (DefMI->isTerminator()) { -        DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); +        LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");          return false;        }      } @@ -279,7 +280,7 @@ bool SSAIfConv::findInsertionPoint() {      --I;      // Some of the conditional code depends in I.      if (InsertAfter.count(&*I)) { -      DEBUG(dbgs() << "Can't insert code after " << *I); +      LLVM_DEBUG(dbgs() << "Can't insert code after " << *I);        return false;      } @@ -313,7 +314,7 @@ bool SSAIfConv::findInsertionPoint() {      // Some of the clobbered registers are live before I, not a valid insertion      // point.      if (!LiveRegUnits.empty()) { -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "Would clobber";          for (SparseSet<unsigned>::const_iterator               i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) @@ -325,10 +326,10 @@ bool SSAIfConv::findInsertionPoint() {      // This is a valid insertion point.      InsertionPoint = I; -    DEBUG(dbgs() << "Can insert before " << *I); +    LLVM_DEBUG(dbgs() << "Can insert before " << *I);      return true;    } -  DEBUG(dbgs() << "No legal insertion point found.\n"); +  LLVM_DEBUG(dbgs() << "No legal insertion point found.\n");    return false;  } @@ -361,39 +362,39 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {      if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||          Succ1->succ_begin()[0] != Tail)        return false; -    DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " -                 << printMBBReference(*Succ0) << "/" -                 << printMBBReference(*Succ1) << " -> " -                 << printMBBReference(*Tail) << '\n'); +    LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " +                      << printMBBReference(*Succ0) << "/" +                      << printMBBReference(*Succ1) << " -> " +                      << printMBBReference(*Tail) << '\n');      // Live-in physregs are tricky to get right when speculating code.      if (!Tail->livein_empty()) { -      DEBUG(dbgs() << "Tail has live-ins.\n"); +      LLVM_DEBUG(dbgs() << "Tail has live-ins.\n");        return false;      }    } else { -    DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " -                 << printMBBReference(*Succ0) << " -> " -                 << printMBBReference(*Tail) << '\n'); +    LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " +                      << printMBBReference(*Succ0) << " -> " +                      << printMBBReference(*Tail) << '\n');    }    // This is a triangle or a diamond.    // If Tail doesn't have any phis, there must be side effects.    if (Tail->empty() || !Tail->front().isPHI()) { -    DEBUG(dbgs() << "No phis in tail.\n"); +    LLVM_DEBUG(dbgs() << "No phis in tail.\n");      return false;    }    // The branch we're looking to eliminate must be analyzable.    Cond.clear();    if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) { -    DEBUG(dbgs() << "Branch not analyzable.\n"); +    LLVM_DEBUG(dbgs() << "Branch not analyzable.\n");      return false;    }    // This is weird, probably some sort of degenerate CFG.    if (!TBB) { -    DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); +    LLVM_DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");      return false;    } @@ -422,7 +423,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {      // Get target information.      if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,                                PI.CondCycles, PI.TCycles, PI.FCycles)) { -      DEBUG(dbgs() << "Can't convert: " << *PI.PHI); +      LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);        return false;      }    } @@ -459,10 +460,10 @@ void SSAIfConv::replacePHIInstrs() {    // Convert all PHIs to select instructions inserted before FirstTerm.    for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {      PHIInfo &PI = PHIs[i]; -    DEBUG(dbgs() << "If-converting " << *PI.PHI); +    LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);      unsigned DstReg = PI.PHI->getOperand(0).getReg();      TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); -    DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm)); +    LLVM_DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm));      PI.PHI->eraseFromParent();      PI.PHI = nullptr;    } @@ -481,7 +482,7 @@ void SSAIfConv::rewritePHIOperands() {      PHIInfo &PI = PHIs[i];      unsigned DstReg = 0; -    DEBUG(dbgs() << "If-converting " << *PI.PHI); +    LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);      if (PI.TReg == PI.FReg) {        // We do not need the select instruction if both incoming values are        // equal. @@ -491,7 +492,7 @@ void SSAIfConv::rewritePHIOperands() {        DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));        TII->insertSelect(*Head, FirstTerm, HeadDL,                           DstReg, Cond, PI.TReg, PI.FReg); -      DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm)); +      LLVM_DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm));      }      // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. @@ -505,7 +506,7 @@ void SSAIfConv::rewritePHIOperands() {          PI.PHI->RemoveOperand(i-2);        }      } -    DEBUG(dbgs() << "          --> " << *PI.PHI); +    LLVM_DEBUG(dbgs() << "          --> " << *PI.PHI);    }  } @@ -563,8 +564,8 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {    assert(Head->succ_empty() && "Additional head successors?");    if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {      // Splice Tail onto the end of Head. -    DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) << " into head " -                 << printMBBReference(*Head) << '\n'); +    LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) +                      << " into head " << printMBBReference(*Head) << '\n');      Head->splice(Head->end(), Tail,                       Tail->begin(), Tail->end());      Head->transferSuccessorsAndUpdatePHIs(Tail); @@ -572,12 +573,12 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {      Tail->eraseFromParent();    } else {      // We need a branch to Tail, let code placement work it out later. -    DEBUG(dbgs() << "Converting to unconditional branch.\n"); +    LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n");      SmallVector<MachineOperand, 0> EmptyCond;      TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);      Head->addSuccessor(Tail);    } -  DEBUG(dbgs() << *Head); +  LLVM_DEBUG(dbgs() << *Head);  } @@ -692,7 +693,7 @@ bool EarlyIfConverter::shouldConvertIf() {    MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());    MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); -  DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); +  LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);    unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),                                FBBTrace.getCriticalPath()); @@ -706,10 +707,10 @@ bool EarlyIfConverter::shouldConvertIf() {    if (IfConv.TBB != IfConv.Tail)      ExtraBlocks.push_back(IfConv.TBB);    unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); -  DEBUG(dbgs() << "Resource length " << ResLength -               << ", minimal critical path " << MinCrit << '\n'); +  LLVM_DEBUG(dbgs() << "Resource length " << ResLength +                    << ", minimal critical path " << MinCrit << '\n');    if (ResLength > MinCrit + CritLimit) { -    DEBUG(dbgs() << "Not enough available ILP.\n"); +    LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");      return false;    } @@ -719,7 +720,7 @@ bool EarlyIfConverter::shouldConvertIf() {    MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);    unsigned BranchDepth =        HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth; -  DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); +  LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');    // Look at all the tail phis, and compute the critical path extension caused    // by inserting select instructions. @@ -728,15 +729,15 @@ bool EarlyIfConverter::shouldConvertIf() {      SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];      unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);      unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth; -    DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); +    LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);      // The condition is pulled into the critical path.      unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);      if (CondDepth > MaxDepth) {        unsigned Extra = CondDepth - MaxDepth; -      DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); +      LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");        if (Extra > CritLimit) { -        DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); +        LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');          return false;        }      } @@ -745,9 +746,9 @@ bool EarlyIfConverter::shouldConvertIf() {      unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);      if (TDepth > MaxDepth) {        unsigned Extra = TDepth - MaxDepth; -      DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); +      LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");        if (Extra > CritLimit) { -        DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); +        LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');          return false;        }      } @@ -756,9 +757,9 @@ bool EarlyIfConverter::shouldConvertIf() {      unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);      if (FDepth > MaxDepth) {        unsigned Extra = FDepth - MaxDepth; -      DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); +      LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");        if (Extra > CritLimit) { -        DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); +        LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');          return false;        }      } @@ -783,8 +784,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {  }  bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { -  DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" -               << "********** Function: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" +                    << "********** Function: " << MF.getName() << '\n');    if (skipFunction(MF.getFunction()))      return false; diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp deleted file mode 100644 index 61ec3f4be1dc..000000000000 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ /dev/null @@ -1,755 +0,0 @@ -//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/ExecutionDepsFix.h" - -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "execution-deps-fix" - -/// Translate TRI register number to a list of indices into our smaller tables -/// of interesting registers. -iterator_range<SmallVectorImpl<int>::const_iterator> -ExecutionDepsFix::regIndices(unsigned Reg) const { -  assert(Reg < AliasMap.size() && "Invalid register"); -  const auto &Entry = AliasMap[Reg]; -  return make_range(Entry.begin(), Entry.end()); -} - -DomainValue *ExecutionDepsFix::alloc(int domain) { -  DomainValue *dv = Avail.empty() ? -                      new(Allocator.Allocate()) DomainValue : -                      Avail.pop_back_val(); -  if (domain >= 0) -    dv->addDomain(domain); -  assert(dv->Refs == 0 && "Reference count wasn't cleared"); -  assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); -  return dv; -} - -/// Release a reference to DV.  When the last reference is released, -/// collapse if needed. -void ExecutionDepsFix::release(DomainValue *DV) { -  while (DV) { -    assert(DV->Refs && "Bad DomainValue"); -    if (--DV->Refs) -      return; - -    // There are no more DV references. Collapse any contained instructions. -    if (DV->AvailableDomains && !DV->isCollapsed()) -      collapse(DV, DV->getFirstDomain()); - -    DomainValue *Next = DV->Next; -    DV->clear(); -    Avail.push_back(DV); -    // Also release the next DomainValue in the chain. -    DV = Next; -  } -} - -/// Follow the chain of dead DomainValues until a live DomainValue is reached. -/// Update the referenced pointer when necessary. -DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) { -  DomainValue *DV = DVRef; -  if (!DV || !DV->Next) -    return DV; - -  // DV has a chain. Find the end. -  do DV = DV->Next; -  while (DV->Next); - -  // Update DVRef to point to DV. -  retain(DV); -  release(DVRef); -  DVRef = DV; -  return DV; -} - -/// Set LiveRegs[rx] = dv, updating reference counts. -void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) { -  assert(unsigned(rx) < NumRegs && "Invalid index"); -  assert(LiveRegs && "Must enter basic block first."); - -  if (LiveRegs[rx].Value == dv) -    return; -  if (LiveRegs[rx].Value) -    release(LiveRegs[rx].Value); -  LiveRegs[rx].Value = retain(dv); -} - -// Kill register rx, recycle or collapse any DomainValue. -void ExecutionDepsFix::kill(int rx) { -  assert(unsigned(rx) < NumRegs && "Invalid index"); -  assert(LiveRegs && "Must enter basic block first."); -  if (!LiveRegs[rx].Value) -    return; - -  release(LiveRegs[rx].Value); -  LiveRegs[rx].Value = nullptr; -} - -/// Force register rx into domain. -void ExecutionDepsFix::force(int rx, unsigned domain) { -  assert(unsigned(rx) < NumRegs && "Invalid index"); -  assert(LiveRegs && "Must enter basic block first."); -  if (DomainValue *dv = LiveRegs[rx].Value) { -    if (dv->isCollapsed()) -      dv->addDomain(domain); -    else if (dv->hasDomain(domain)) -      collapse(dv, domain); -    else { -      // This is an incompatible open DomainValue. Collapse it to whatever and -      // force the new value into domain. This costs a domain crossing. -      collapse(dv, dv->getFirstDomain()); -      assert(LiveRegs[rx].Value && "Not live after collapse?"); -      LiveRegs[rx].Value->addDomain(domain); -    } -  } else { -    // Set up basic collapsed DomainValue. -    setLiveReg(rx, alloc(domain)); -  } -} - -/// Collapse open DomainValue into given domain. If there are multiple -/// registers using dv, they each get a unique collapsed DomainValue. -void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) { -  assert(dv->hasDomain(domain) && "Cannot collapse"); - -  // Collapse all the instructions. -  while (!dv->Instrs.empty()) -    TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); -  dv->setSingleDomain(domain); - -  // If there are multiple users, give them new, unique DomainValues. -  if (LiveRegs && dv->Refs > 1) -    for (unsigned rx = 0; rx != NumRegs; ++rx) -      if (LiveRegs[rx].Value == dv) -        setLiveReg(rx, alloc(domain)); -} - -/// All instructions and registers in B are moved to A, and B is released. -bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) { -  assert(!A->isCollapsed() && "Cannot merge into collapsed"); -  assert(!B->isCollapsed() && "Cannot merge from collapsed"); -  if (A == B) -    return true; -  // Restrict to the domains that A and B have in common. -  unsigned common = A->getCommonDomains(B->AvailableDomains); -  if (!common) -    return false; -  A->AvailableDomains = common; -  A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); - -  // Clear the old DomainValue so we won't try to swizzle instructions twice. -  B->clear(); -  // All uses of B are referred to A. -  B->Next = retain(A); - -  for (unsigned rx = 0; rx != NumRegs; ++rx) { -    assert(LiveRegs && "no space allocated for live registers"); -    if (LiveRegs[rx].Value == B) -      setLiveReg(rx, A); -  } -  return true; -} - -/// Set up LiveRegs by merging predecessor live-out values. -void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { -  // Reset instruction counter in each basic block. -  CurInstr = 0; - -  // Set up UndefReads to track undefined register reads. -  UndefReads.clear(); -  LiveRegSet.clear(); - -  // Set up LiveRegs to represent registers entering MBB. -  if (!LiveRegs) -    LiveRegs = new LiveReg[NumRegs]; - -  // Default values are 'nothing happened a long time ago'. -  for (unsigned rx = 0; rx != NumRegs; ++rx) { -    LiveRegs[rx].Value = nullptr; -    LiveRegs[rx].Def = -(1 << 20); -  } - -  // This is the entry block. -  if (MBB->pred_empty()) { -    for (const auto &LI : MBB->liveins()) { -      for (int rx : regIndices(LI.PhysReg)) { -        // Treat function live-ins as if they were defined just before the first -        // instruction.  Usually, function arguments are set up immediately -        // before the call. -        LiveRegs[rx].Def = -1; -      } -    } -    DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); -    return; -  } - -  // Try to coalesce live-out registers from predecessors. -  for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), -       pe = MBB->pred_end(); pi != pe; ++pi) { -    auto fi = MBBInfos.find(*pi); -    assert(fi != MBBInfos.end() && -           "Should have pre-allocated MBBInfos for all MBBs"); -    LiveReg *Incoming = fi->second.OutRegs; -    // Incoming is null if this is a backedge from a BB -    // we haven't processed yet -    if (Incoming == nullptr) { -      continue; -    } - -    for (unsigned rx = 0; rx != NumRegs; ++rx) { -      // Use the most recent predecessor def for each register. -      LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def); - -      DomainValue *pdv = resolve(Incoming[rx].Value); -      if (!pdv) -        continue; -      if (!LiveRegs[rx].Value) { -        setLiveReg(rx, pdv); -        continue; -      } - -      // We have a live DomainValue from more than one predecessor. -      if (LiveRegs[rx].Value->isCollapsed()) { -        // We are already collapsed, but predecessor is not. Force it. -        unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); -        if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) -          collapse(pdv, Domain); -        continue; -      } - -      // Currently open, merge in predecessor. -      if (!pdv->isCollapsed()) -        merge(LiveRegs[rx].Value, pdv); -      else -        force(rx, pdv->getFirstDomain()); -    } -  } -  DEBUG( -      dbgs() << printMBBReference(*MBB) -             << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n")); -} - -void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { -  assert(LiveRegs && "Must enter basic block first."); -  LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs; -  // Save register clearances at end of MBB - used by enterBasicBlock(). -  MBBInfos[MBB].OutRegs = LiveRegs; - -  // While processing the basic block, we kept `Def` relative to the start -  // of the basic block for convenience. However, future use of this information -  // only cares about the clearance from the end of the block, so adjust -  // everything to be relative to the end of the basic block. -  for (unsigned i = 0, e = NumRegs; i != e; ++i) -    LiveRegs[i].Def -= CurInstr; -  if (OldOutRegs) { -    // This must be the second pass. -    // Release all the DomainValues instead of keeping them. -    for (unsigned i = 0, e = NumRegs; i != e; ++i) -      release(OldOutRegs[i].Value); -    delete[] OldOutRegs; -  } -  LiveRegs = nullptr; -} - -bool ExecutionDepsFix::visitInstr(MachineInstr *MI) { -  // Update instructions with explicit execution domains. -  std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI); -  if (DomP.first) { -    if (DomP.second) -      visitSoftInstr(MI, DomP.second); -    else -      visitHardInstr(MI, DomP.first); -  } - -  return !DomP.first; -} - -/// \brief Helps avoid false dependencies on undef registers by updating the -/// machine instructions' undef operand to use a register that the instruction -/// is truly dependent on, or use a register with clearance higher than Pref. -/// Returns true if it was able to find a true dependency, thus not requiring -/// a dependency breaking instruction regardless of clearance. -bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI, -                                                unsigned OpIdx, unsigned Pref) { -  MachineOperand &MO = MI->getOperand(OpIdx); -  assert(MO.isUndef() && "Expected undef machine operand"); - -  unsigned OriginalReg = MO.getReg(); - -  // Update only undef operands that are mapped to one register. -  if (AliasMap[OriginalReg].size() != 1) -    return false; - -  // Get the undef operand's register class -  const TargetRegisterClass *OpRC = -      TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); - -  // If the instruction has a true dependency, we can hide the false depdency -  // behind it. -  for (MachineOperand &CurrMO : MI->operands()) { -    if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || -        !OpRC->contains(CurrMO.getReg())) -      continue; -    // We found a true dependency - replace the undef register with the true -    // dependency. -    MO.setReg(CurrMO.getReg()); -    return true; -  } - -  // Go over all registers in the register class and find the register with -  // max clearance or clearance higher than Pref. -  unsigned MaxClearance = 0; -  unsigned MaxClearanceReg = OriginalReg; -  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC); -  for (auto Reg : Order) { -    assert(AliasMap[Reg].size() == 1 && -           "Reg is expected to be mapped to a single index"); -    int RCrx = *regIndices(Reg).begin(); -    unsigned Clearance = CurInstr - LiveRegs[RCrx].Def; -    if (Clearance <= MaxClearance) -      continue; -    MaxClearance = Clearance; -    MaxClearanceReg = Reg; - -    if (MaxClearance > Pref) -      break; -  } - -  // Update the operand if we found a register with better clearance. -  if (MaxClearanceReg != OriginalReg) -    MO.setReg(MaxClearanceReg); - -  return false; -} - -/// \brief Return true to if it makes sense to break dependence on a partial def -/// or undef use. -bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, -                                             unsigned Pref) { -  unsigned reg = MI->getOperand(OpIdx).getReg(); -  for (int rx : regIndices(reg)) { -    unsigned Clearance = CurInstr - LiveRegs[rx].Def; -    DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - -    if (Pref > Clearance) { -      DEBUG(dbgs() << ": Break dependency.\n"); -      continue; -    } -    DEBUG(dbgs() << ": OK .\n"); -    return false; -  } -  return true; -} - -// Update def-ages for registers defined by MI. -// If Kill is set, also kill off DomainValues clobbered by the defs. -// -// Also break dependencies on partial defs and undef uses. -void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency, -                                   bool Kill) { -  assert(!MI->isDebugValue() && "Won't process debug values"); - -  // Break dependence on undef uses. Do this before updating LiveRegs below. -  unsigned OpNum; -  if (breakDependency) { -    unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); -    if (Pref) { -      bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); -      // We don't need to bother trying to break a dependency if this -      // instruction has a true dependency on that register through another -      // operand - we'll have to wait for it to be available regardless. -      if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) -        UndefReads.push_back(std::make_pair(MI, OpNum)); -    } -  } -  const MCInstrDesc &MCID = MI->getDesc(); -  for (unsigned i = 0, -         e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); -         i != e; ++i) { -    MachineOperand &MO = MI->getOperand(i); -    if (!MO.isReg()) -      continue; -    if (MO.isUse()) -      continue; -    for (int rx : regIndices(MO.getReg())) { -      // This instruction explicitly defines rx. -      DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << CurInstr -                   << '\t' << *MI); - -      if (breakDependency) { -        // Check clearance before partial register updates. -        // Call breakDependence before setting LiveRegs[rx].Def. -        unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); -        if (Pref && shouldBreakDependence(MI, i, Pref)) -          TII->breakPartialRegDependency(*MI, i, TRI); -      } - -      // How many instructions since rx was last written? -      LiveRegs[rx].Def = CurInstr; - -      // Kill off domains redefined by generic instructions. -      if (Kill) -        kill(rx); -    } -  } -  ++CurInstr; -} - -/// \break Break false dependencies on undefined register reads. -/// -/// Walk the block backward computing precise liveness. This is expensive, so we -/// only do it on demand. Note that the occurrence of undefined register reads -/// that should be broken is very rare, but when they occur we may have many in -/// a single block. -void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) { -  if (UndefReads.empty()) -    return; - -  // Collect this block's live out register units. -  LiveRegSet.init(*TRI); -  // We do not need to care about pristine registers as they are just preserved -  // but not actually used in the function. -  LiveRegSet.addLiveOutsNoPristines(*MBB); - -  MachineInstr *UndefMI = UndefReads.back().first; -  unsigned OpIdx = UndefReads.back().second; - -  for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { -    // Update liveness, including the current instruction's defs. -    LiveRegSet.stepBackward(I); - -    if (UndefMI == &I) { -      if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) -        TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); - -      UndefReads.pop_back(); -      if (UndefReads.empty()) -        return; - -      UndefMI = UndefReads.back().first; -      OpIdx = UndefReads.back().second; -    } -  } -} - -// A hard instruction only works in one domain. All input registers will be -// forced into that domain. -void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { -  // Collapse all uses. -  for (unsigned i = mi->getDesc().getNumDefs(), -                e = mi->getDesc().getNumOperands(); i != e; ++i) { -    MachineOperand &mo = mi->getOperand(i); -    if (!mo.isReg()) continue; -    for (int rx : regIndices(mo.getReg())) { -      force(rx, domain); -    } -  } - -  // Kill all defs and force them. -  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { -    MachineOperand &mo = mi->getOperand(i); -    if (!mo.isReg()) continue; -    for (int rx : regIndices(mo.getReg())) { -      kill(rx); -      force(rx, domain); -    } -  } -} - -// A soft instruction can be changed to work in other domains given by mask. -void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { -  // Bitmask of available domains for this instruction after taking collapsed -  // operands into account. -  unsigned available = mask; - -  // Scan the explicit use operands for incoming domains. -  SmallVector<int, 4> used; -  if (LiveRegs) -    for (unsigned i = mi->getDesc().getNumDefs(), -                  e = mi->getDesc().getNumOperands(); i != e; ++i) { -      MachineOperand &mo = mi->getOperand(i); -      if (!mo.isReg()) continue; -      for (int rx : regIndices(mo.getReg())) { -        DomainValue *dv = LiveRegs[rx].Value; -        if (dv == nullptr) -          continue; -        // Bitmask of domains that dv and available have in common. -        unsigned common = dv->getCommonDomains(available); -        // Is it possible to use this collapsed register for free? -        if (dv->isCollapsed()) { -          // Restrict available domains to the ones in common with the operand. -          // If there are no common domains, we must pay the cross-domain -          // penalty for this operand. -          if (common) available = common; -        } else if (common) -          // Open DomainValue is compatible, save it for merging. -          used.push_back(rx); -        else -          // Open DomainValue is not compatible with instruction. It is useless -          // now. -          kill(rx); -      } -    } - -  // If the collapsed operands force a single domain, propagate the collapse. -  if (isPowerOf2_32(available)) { -    unsigned domain = countTrailingZeros(available); -    TII->setExecutionDomain(*mi, domain); -    visitHardInstr(mi, domain); -    return; -  } - -  // Kill off any remaining uses that don't match available, and build a list of -  // incoming DomainValues that we want to merge. -  SmallVector<const LiveReg *, 4> Regs; -  for (int rx : used) { -    assert(LiveRegs && "no space allocated for live registers"); -    const LiveReg &LR = LiveRegs[rx]; -    // This useless DomainValue could have been missed above. -    if (!LR.Value->getCommonDomains(available)) { -      kill(rx); -      continue; -    } -    // Sorted insertion. -    auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR, -                              [](const LiveReg *LHS, const LiveReg *RHS) { -                                return LHS->Def < RHS->Def; -                              }); -    Regs.insert(I, &LR); -  } - -  // doms are now sorted in order of appearance. Try to merge them all, giving -  // priority to the latest ones. -  DomainValue *dv = nullptr; -  while (!Regs.empty()) { -    if (!dv) { -      dv = Regs.pop_back_val()->Value; -      // Force the first dv to match the current instruction. -      dv->AvailableDomains = dv->getCommonDomains(available); -      assert(dv->AvailableDomains && "Domain should have been filtered"); -      continue; -    } - -    DomainValue *Latest = Regs.pop_back_val()->Value; -    // Skip already merged values. -    if (Latest == dv || Latest->Next) -      continue; -    if (merge(dv, Latest)) -      continue; - -    // If latest didn't merge, it is useless now. Kill all registers using it. -    for (int i : used) { -      assert(LiveRegs && "no space allocated for live registers"); -      if (LiveRegs[i].Value == Latest) -        kill(i); -    } -  } - -  // dv is the DomainValue we are going to use for this instruction. -  if (!dv) { -    dv = alloc(); -    dv->AvailableDomains = available; -  } -  dv->Instrs.push_back(mi); - -  // Finally set all defs and non-collapsed uses to dv. We must iterate through -  // all the operators, including imp-def ones. -  for (MachineInstr::mop_iterator ii = mi->operands_begin(), -                                  ee = mi->operands_end(); -                                  ii != ee; ++ii) { -    MachineOperand &mo = *ii; -    if (!mo.isReg()) continue; -    for (int rx : regIndices(mo.getReg())) { -      if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { -        kill(rx); -        setLiveReg(rx, dv); -      } -    } -  } -} - -void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB, -                                         bool PrimaryPass) { -  enterBasicBlock(MBB); -  // If this block is not done, it makes little sense to make any decisions -  // based on clearance information. We need to make a second pass anyway, -  // and by then we'll have better information, so we can avoid doing the work -  // to try and break dependencies now. -  bool breakDependency = isBlockDone(MBB); -  for (MachineInstr &MI : *MBB) { -    if (!MI.isDebugValue()) { -      bool Kill = false; -      if (PrimaryPass) -        Kill = visitInstr(&MI); -      processDefs(&MI, breakDependency, Kill); -    } -  } -  if (breakDependency) -    processUndefReads(MBB); -  leaveBasicBlock(MBB); -} - -bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) { -  return MBBInfos[MBB].PrimaryCompleted && -         MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming && -         MBBInfos[MBB].IncomingProcessed == MBB->pred_size(); -} - -bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) { -  if (skipFunction(mf.getFunction())) -    return false; -  MF = &mf; -  TII = MF->getSubtarget().getInstrInfo(); -  TRI = MF->getSubtarget().getRegisterInfo(); -  RegClassInfo.runOnMachineFunction(mf); -  LiveRegs = nullptr; -  assert(NumRegs == RC->getNumRegs() && "Bad regclass"); - -  DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " -               << TRI->getRegClassName(RC) << " **********\n"); - -  // If no relevant registers are used in the function, we can skip it -  // completely. -  bool anyregs = false; -  const MachineRegisterInfo &MRI = mf.getRegInfo(); -  for (unsigned Reg : *RC) { -    if (MRI.isPhysRegUsed(Reg)) { -      anyregs = true; -      break; -    } -  } -  if (!anyregs) return false; - -  // Initialize the AliasMap on the first use. -  if (AliasMap.empty()) { -    // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and -    // therefore the LiveRegs array. -    AliasMap.resize(TRI->getNumRegs()); -    for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) -      for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); -           AI.isValid(); ++AI) -        AliasMap[*AI].push_back(i); -  } - -  // Initialize the MMBInfos -  for (auto &MBB : mf) { -    MBBInfo InitialInfo; -    MBBInfos.insert(std::make_pair(&MBB, InitialInfo)); -  } - -  /* -   *  We want to visit every instruction in every basic block in order to update -   *  it's execution domain or break any false dependencies. However, for the -   *  dependency breaking, we need to know clearances from all predecessors -   *  (including any backedges). One way to do so would be to do two complete -   *  passes over all basic blocks/instructions, the first for recording -   *  clearances, the second to break the dependencies. However, for functions -   *  without backedges, or functions with a lot of straight-line code, and -   *  a small loop, that would be a lot of unnecessary work (since only the -   *  BBs that are part of the loop require two passes). As an example, -   *  consider the following loop. -   * -   * -   *     PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT -   *           ^                                  | -   *           +----------------------------------+ -   * -   *  The iteration order is as follows: -   *  Naive: PH A B C D A' B' C' D' -   *  Optimized: PH A B C A' B' C' D -   * -   *  Note that we avoid processing D twice, because we can entirely process -   *  the predecessors before getting to D. We call a block that is ready -   *  for its second round of processing `done` (isBlockDone). Once we finish -   *  processing some block, we update the counters in MBBInfos and re-process -   *  any successors that are now done. -   */ - -  MachineBasicBlock *Entry = &*MF->begin(); -  ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); -  SmallVector<MachineBasicBlock *, 4> Workqueue; -  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator -         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { -    MachineBasicBlock *MBB = *MBBI; -    // N.B: IncomingProcessed and IncomingCompleted were already updated while -    // processing this block's predecessors. -    MBBInfos[MBB].PrimaryCompleted = true; -    MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed; -    bool Primary = true; -    Workqueue.push_back(MBB); -    while (!Workqueue.empty()) { -      MachineBasicBlock *ActiveMBB = &*Workqueue.back(); -      Workqueue.pop_back(); -      processBasicBlock(ActiveMBB, Primary); -      bool Done = isBlockDone(ActiveMBB); -      for (auto *Succ : ActiveMBB->successors()) { -        if (!isBlockDone(Succ)) { -          if (Primary) { -            MBBInfos[Succ].IncomingProcessed++; -          } -          if (Done) { -            MBBInfos[Succ].IncomingCompleted++; -          } -          if (isBlockDone(Succ)) { -            Workqueue.push_back(Succ); -          } -        } -      } -      Primary = false; -    } -  } - -  // We need to go through again and finalize any blocks that are not done yet. -  // This is possible if blocks have dead predecessors, so we didn't visit them -  // above. -  for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator -           MBBI = RPOT.begin(), -           MBBE = RPOT.end(); -       MBBI != MBBE; ++MBBI) { -    MachineBasicBlock *MBB = *MBBI; -    if (!isBlockDone(MBB)) { -      processBasicBlock(MBB, false); -      // Don't update successors here. We'll get to them anyway through this -      // loop. -    } -  } - -  // Clear the LiveOuts vectors and collapse any remaining DomainValues. -  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator -         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { -    auto FI = MBBInfos.find(*MBBI); -    if (FI == MBBInfos.end() || !FI->second.OutRegs) -      continue; -    for (unsigned i = 0, e = NumRegs; i != e; ++i) -      if (FI->second.OutRegs[i].Value) -        release(FI->second.OutRegs[i].Value); -    delete[] FI->second.OutRegs; -  } -  MBBInfos.clear(); -  UndefReads.clear(); -  Avail.clear(); -  Allocator.DestroyAll(); - -  return false; -} diff --git a/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp new file mode 100644 index 000000000000..458dcf2b0e26 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExecutionDomainFix.cpp @@ -0,0 +1,473 @@ +//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ExecutionDomainFix.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "execution-deps-fix" + +iterator_range<SmallVectorImpl<int>::const_iterator> +ExecutionDomainFix::regIndices(unsigned Reg) const { +  assert(Reg < AliasMap.size() && "Invalid register"); +  const auto &Entry = AliasMap[Reg]; +  return make_range(Entry.begin(), Entry.end()); +} + +DomainValue *ExecutionDomainFix::alloc(int domain) { +  DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue +                                  : Avail.pop_back_val(); +  if (domain >= 0) +    dv->addDomain(domain); +  assert(dv->Refs == 0 && "Reference count wasn't cleared"); +  assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); +  return dv; +} + +void ExecutionDomainFix::release(DomainValue *DV) { +  while (DV) { +    assert(DV->Refs && "Bad DomainValue"); +    if (--DV->Refs) +      return; + +    // There are no more DV references. Collapse any contained instructions. +    if (DV->AvailableDomains && !DV->isCollapsed()) +      collapse(DV, DV->getFirstDomain()); + +    DomainValue *Next = DV->Next; +    DV->clear(); +    Avail.push_back(DV); +    // Also release the next DomainValue in the chain. +    DV = Next; +  } +} + +DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) { +  DomainValue *DV = DVRef; +  if (!DV || !DV->Next) +    return DV; + +  // DV has a chain. Find the end. +  do +    DV = DV->Next; +  while (DV->Next); + +  // Update DVRef to point to DV. +  retain(DV); +  release(DVRef); +  DVRef = DV; +  return DV; +} + +void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) { +  assert(unsigned(rx) < NumRegs && "Invalid index"); +  assert(!LiveRegs.empty() && "Must enter basic block first."); + +  if (LiveRegs[rx] == dv) +    return; +  if (LiveRegs[rx]) +    release(LiveRegs[rx]); +  LiveRegs[rx] = retain(dv); +} + +void ExecutionDomainFix::kill(int rx) { +  assert(unsigned(rx) < NumRegs && "Invalid index"); +  assert(!LiveRegs.empty() && "Must enter basic block first."); +  if (!LiveRegs[rx]) +    return; + +  release(LiveRegs[rx]); +  LiveRegs[rx] = nullptr; +} + +void ExecutionDomainFix::force(int rx, unsigned domain) { +  assert(unsigned(rx) < NumRegs && "Invalid index"); +  assert(!LiveRegs.empty() && "Must enter basic block first."); +  if (DomainValue *dv = LiveRegs[rx]) { +    if (dv->isCollapsed()) +      dv->addDomain(domain); +    else if (dv->hasDomain(domain)) +      collapse(dv, domain); +    else { +      // This is an incompatible open DomainValue. Collapse it to whatever and +      // force the new value into domain. This costs a domain crossing. +      collapse(dv, dv->getFirstDomain()); +      assert(LiveRegs[rx] && "Not live after collapse?"); +      LiveRegs[rx]->addDomain(domain); +    } +  } else { +    // Set up basic collapsed DomainValue. +    setLiveReg(rx, alloc(domain)); +  } +} + +void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) { +  assert(dv->hasDomain(domain) && "Cannot collapse"); + +  // Collapse all the instructions. +  while (!dv->Instrs.empty()) +    TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); +  dv->setSingleDomain(domain); + +  // If there are multiple users, give them new, unique DomainValues. +  if (!LiveRegs.empty() && dv->Refs > 1) +    for (unsigned rx = 0; rx != NumRegs; ++rx) +      if (LiveRegs[rx] == dv) +        setLiveReg(rx, alloc(domain)); +} + +bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) { +  assert(!A->isCollapsed() && "Cannot merge into collapsed"); +  assert(!B->isCollapsed() && "Cannot merge from collapsed"); +  if (A == B) +    return true; +  // Restrict to the domains that A and B have in common. +  unsigned common = A->getCommonDomains(B->AvailableDomains); +  if (!common) +    return false; +  A->AvailableDomains = common; +  A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + +  // Clear the old DomainValue so we won't try to swizzle instructions twice. +  B->clear(); +  // All uses of B are referred to A. +  B->Next = retain(A); + +  for (unsigned rx = 0; rx != NumRegs; ++rx) { +    assert(!LiveRegs.empty() && "no space allocated for live registers"); +    if (LiveRegs[rx] == B) +      setLiveReg(rx, A); +  } +  return true; +} + +void ExecutionDomainFix::enterBasicBlock( +    const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + +  MachineBasicBlock *MBB = TraversedMBB.MBB; + +  // Set up LiveRegs to represent registers entering MBB. +  // Set default domain values to 'no domain' (nullptr) +  if (LiveRegs.empty()) +    LiveRegs.assign(NumRegs, nullptr); + +  // This is the entry block. +  if (MBB->pred_empty()) { +    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); +    return; +  } + +  // Try to coalesce live-out registers from predecessors. +  for (MachineBasicBlock *pred : MBB->predecessors()) { +    assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && +           "Should have pre-allocated MBBInfos for all MBBs"); +    LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; +    // Incoming is null if this is a backedge from a BB +    // we haven't processed yet +    if (Incoming.empty()) +      continue; + +    for (unsigned rx = 0; rx != NumRegs; ++rx) { +      DomainValue *pdv = resolve(Incoming[rx]); +      if (!pdv) +        continue; +      if (!LiveRegs[rx]) { +        setLiveReg(rx, pdv); +        continue; +      } + +      // We have a live DomainValue from more than one predecessor. +      if (LiveRegs[rx]->isCollapsed()) { +        // We are already collapsed, but predecessor is not. Force it. +        unsigned Domain = LiveRegs[rx]->getFirstDomain(); +        if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) +          collapse(pdv, Domain); +        continue; +      } + +      // Currently open, merge in predecessor. +      if (!pdv->isCollapsed()) +        merge(LiveRegs[rx], pdv); +      else +        force(rx, pdv->getFirstDomain()); +    } +  } +  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) +                    << (!TraversedMBB.IsDone ? ": incomplete\n" +                                             : ": all preds known\n")); +} + +void ExecutionDomainFix::leaveBasicBlock( +    const LoopTraversal::TraversedMBBInfo &TraversedMBB) { +  assert(!LiveRegs.empty() && "Must enter basic block first."); +  unsigned MBBNumber = TraversedMBB.MBB->getNumber(); +  assert(MBBNumber < MBBOutRegsInfos.size() && +         "Unexpected basic block number."); +  // Save register clearances at end of MBB - used by enterBasicBlock(). +  for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) { +    release(OldLiveReg); +  } +  MBBOutRegsInfos[MBBNumber] = LiveRegs; +  LiveRegs.clear(); +} + +bool ExecutionDomainFix::visitInstr(MachineInstr *MI) { +  // Update instructions with explicit execution domains. +  std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI); +  if (DomP.first) { +    if (DomP.second) +      visitSoftInstr(MI, DomP.second); +    else +      visitHardInstr(MI, DomP.first); +  } + +  return !DomP.first; +} + +void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) { +  assert(!MI->isDebugInstr() && "Won't process debug values"); +  const MCInstrDesc &MCID = MI->getDesc(); +  for (unsigned i = 0, +                e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); +       i != e; ++i) { +    MachineOperand &MO = MI->getOperand(i); +    if (!MO.isReg()) +      continue; +    if (MO.isUse()) +      continue; +    for (int rx : regIndices(MO.getReg())) { +      // This instruction explicitly defines rx. +      LLVM_DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI); + +      // Kill off domains redefined by generic instructions. +      if (Kill) +        kill(rx); +    } +  } +} + +void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) { +  // Collapse all uses. +  for (unsigned i = mi->getDesc().getNumDefs(), +                e = mi->getDesc().getNumOperands(); +       i != e; ++i) { +    MachineOperand &mo = mi->getOperand(i); +    if (!mo.isReg()) +      continue; +    for (int rx : regIndices(mo.getReg())) { +      force(rx, domain); +    } +  } + +  // Kill all defs and force them. +  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { +    MachineOperand &mo = mi->getOperand(i); +    if (!mo.isReg()) +      continue; +    for (int rx : regIndices(mo.getReg())) { +      kill(rx); +      force(rx, domain); +    } +  } +} + +void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { +  // Bitmask of available domains for this instruction after taking collapsed +  // operands into account. +  unsigned available = mask; + +  // Scan the explicit use operands for incoming domains. +  SmallVector<int, 4> used; +  if (!LiveRegs.empty()) +    for (unsigned i = mi->getDesc().getNumDefs(), +                  e = mi->getDesc().getNumOperands(); +         i != e; ++i) { +      MachineOperand &mo = mi->getOperand(i); +      if (!mo.isReg()) +        continue; +      for (int rx : regIndices(mo.getReg())) { +        DomainValue *dv = LiveRegs[rx]; +        if (dv == nullptr) +          continue; +        // Bitmask of domains that dv and available have in common. +        unsigned common = dv->getCommonDomains(available); +        // Is it possible to use this collapsed register for free? +        if (dv->isCollapsed()) { +          // Restrict available domains to the ones in common with the operand. +          // If there are no common domains, we must pay the cross-domain +          // penalty for this operand. +          if (common) +            available = common; +        } else if (common) +          // Open DomainValue is compatible, save it for merging. +          used.push_back(rx); +        else +          // Open DomainValue is not compatible with instruction. It is useless +          // now. +          kill(rx); +      } +    } + +  // If the collapsed operands force a single domain, propagate the collapse. +  if (isPowerOf2_32(available)) { +    unsigned domain = countTrailingZeros(available); +    TII->setExecutionDomain(*mi, domain); +    visitHardInstr(mi, domain); +    return; +  } + +  // Kill off any remaining uses that don't match available, and build a list of +  // incoming DomainValues that we want to merge. +  SmallVector<int, 4> Regs; +  for (int rx : used) { +    assert(!LiveRegs.empty() && "no space allocated for live registers"); +    DomainValue *&LR = LiveRegs[rx]; +    // This useless DomainValue could have been missed above. +    if (!LR->getCommonDomains(available)) { +      kill(rx); +      continue; +    } +    // Sorted insertion. +    // Enables giving priority to the latest domains during merging. +    auto I = std::upper_bound( +        Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) { +          return RDA->getReachingDef(mi, RC->getRegister(LHS)) < +                 RDA->getReachingDef(mi, RC->getRegister(RHS)); +        }); +    Regs.insert(I, rx); +  } + +  // doms are now sorted in order of appearance. Try to merge them all, giving +  // priority to the latest ones. +  DomainValue *dv = nullptr; +  while (!Regs.empty()) { +    if (!dv) { +      dv = LiveRegs[Regs.pop_back_val()]; +      // Force the first dv to match the current instruction. +      dv->AvailableDomains = dv->getCommonDomains(available); +      assert(dv->AvailableDomains && "Domain should have been filtered"); +      continue; +    } + +    DomainValue *Latest = LiveRegs[Regs.pop_back_val()]; +    // Skip already merged values. +    if (Latest == dv || Latest->Next) +      continue; +    if (merge(dv, Latest)) +      continue; + +    // If latest didn't merge, it is useless now. Kill all registers using it. +    for (int i : used) { +      assert(!LiveRegs.empty() && "no space allocated for live registers"); +      if (LiveRegs[i] == Latest) +        kill(i); +    } +  } + +  // dv is the DomainValue we are going to use for this instruction. +  if (!dv) { +    dv = alloc(); +    dv->AvailableDomains = available; +  } +  dv->Instrs.push_back(mi); + +  // Finally set all defs and non-collapsed uses to dv. We must iterate through +  // all the operators, including imp-def ones. +  for (MachineOperand &mo : mi->operands()) { +    if (!mo.isReg()) +      continue; +    for (int rx : regIndices(mo.getReg())) { +      if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) { +        kill(rx); +        setLiveReg(rx, dv); +      } +    } +  } +} + +void ExecutionDomainFix::processBasicBlock( +    const LoopTraversal::TraversedMBBInfo &TraversedMBB) { +  enterBasicBlock(TraversedMBB); +  // If this block is not done, it makes little sense to make any decisions +  // based on clearance information. We need to make a second pass anyway, +  // and by then we'll have better information, so we can avoid doing the work +  // to try and break dependencies now. +  for (MachineInstr &MI : *TraversedMBB.MBB) { +    if (!MI.isDebugInstr()) { +      bool Kill = false; +      if (TraversedMBB.PrimaryPass) +        Kill = visitInstr(&MI); +      processDefs(&MI, Kill); +    } +  } +  leaveBasicBlock(TraversedMBB); +} + +bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { +  if (skipFunction(mf.getFunction())) +    return false; +  MF = &mf; +  TII = MF->getSubtarget().getInstrInfo(); +  TRI = MF->getSubtarget().getRegisterInfo(); +  LiveRegs.clear(); +  assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + +  LLVM_DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: " +                    << TRI->getRegClassName(RC) << " **********\n"); + +  // If no relevant registers are used in the function, we can skip it +  // completely. +  bool anyregs = false; +  const MachineRegisterInfo &MRI = mf.getRegInfo(); +  for (unsigned Reg : *RC) { +    if (MRI.isPhysRegUsed(Reg)) { +      anyregs = true; +      break; +    } +  } +  if (!anyregs) +    return false; + +  RDA = &getAnalysis<ReachingDefAnalysis>(); + +  // Initialize the AliasMap on the first use. +  if (AliasMap.empty()) { +    // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and +    // therefore the LiveRegs array. +    AliasMap.resize(TRI->getNumRegs()); +    for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) +      for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); +           ++AI) +        AliasMap[*AI].push_back(i); +  } + +  // Initialize the MBBOutRegsInfos +  MBBOutRegsInfos.resize(mf.getNumBlockIDs()); + +  // Traverse the basic blocks. +  LoopTraversal Traversal; +  LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); +  for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { +    processBasicBlock(TraversedMBB); +  } + +  for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) { +    for (DomainValue *OutLiveReg : OutLiveRegs) { +      if (OutLiveReg) +        release(OutLiveReg); +    } +  } +  MBBOutRegsInfos.clear(); +  Avail.clear(); +  Allocator.DestroyAll(); + +  return false; +} diff --git a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp index 09c808463a41..d7562cbf1e90 100644 --- a/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -32,7 +32,7 @@ STATISTIC(NumMemCmpGreaterThanMax,            "Number of memcmp calls with size greater than max size");  STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); -static cl::opt<unsigned> MemCmpNumLoadsPerBlock( +static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(      "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),      cl::desc("The number of loads per basic block for inline expansion of "               "memcmp that is only being compared against zero.")); @@ -56,7 +56,7 @@ class MemCmpExpansion {    const uint64_t Size;    unsigned MaxLoadSize;    uint64_t NumLoadsNonOneByte; -  const uint64_t NumLoadsPerBlock; +  const uint64_t NumLoadsPerBlockForZeroCmp;    std::vector<BasicBlock *> LoadCmpBlocks;    BasicBlock *EndBlock;    PHINode *PhiRes; @@ -102,7 +102,7 @@ class MemCmpExpansion {    MemCmpExpansion(CallInst *CI, uint64_t Size,                    const TargetTransformInfo::MemCmpExpansionOptions &Options,                    unsigned MaxNumLoads, const bool IsUsedForZeroCmp, -                  unsigned NumLoadsPerBlock, const DataLayout &DL); +                  unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout);    unsigned getNumBlocks();    uint64_t getNumLoads() const { return LoadSequence.size(); } @@ -122,12 +122,12 @@ MemCmpExpansion::MemCmpExpansion(      CallInst *const CI, uint64_t Size,      const TargetTransformInfo::MemCmpExpansionOptions &Options,      const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, -    const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) +    const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout)      : CI(CI),        Size(Size),        MaxLoadSize(0),        NumLoadsNonOneByte(0), -      NumLoadsPerBlock(NumLoadsPerBlock), +      NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp),        IsUsedForZeroCmp(IsUsedForZeroCmp),        DL(TheDataLayout),        Builder(CI) { @@ -171,8 +171,8 @@ MemCmpExpansion::MemCmpExpansion(  unsigned MemCmpExpansion::getNumBlocks() {    if (IsUsedForZeroCmp) -    return getNumLoads() / NumLoadsPerBlock + -           (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); +    return getNumLoads() / NumLoadsPerBlockForZeroCmp + +           (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);    return getNumLoads();  } @@ -249,7 +249,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,    Value *Diff;    const unsigned NumLoads = -      std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); +      std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);    // For a single-block expansion, start inserting before the memcmp call.    if (LoadCmpBlocks.empty()) @@ -519,8 +519,6 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {  /// A memcmp expansion that only has one block of load and compare can bypass  /// the compare, branch, and phi IR that is required in the general case.  Value *MemCmpExpansion::getMemCmpOneBlock() { -  assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); -    Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);    Value *Source1 = CI->getArgOperand(0);    Value *Source2 = CI->getArgOperand(1); @@ -566,11 +564,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() {  // This function expands the memcmp call into an inline expansion and returns  // the memcmp result.  Value *MemCmpExpansion::getMemCmpExpansion() { -  // A memcmp with zero-comparison with only one block of load and compare does -  // not need to set up any extra blocks. This case could be handled in the DAG, -  // but since we have all of the machinery to flexibly expand any memcpy here, -  // we choose to handle this case too to avoid fragmented lowering. -  if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { +  // Create the basic block framework for a multi-block expansion. +  if (getNumBlocks() != 1) {      BasicBlock *StartBlock = CI->getParent();      EndBlock = StartBlock->splitBasicBlock(CI, "endblock");      setupEndBlockPHINodes(); @@ -596,8 +591,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {      return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()                                 : getMemCmpExpansionZeroCase(); -  // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). -  if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); +  if (getNumBlocks() == 1) +    return getMemCmpOneBlock();    for (unsigned I = 0; I < getNumBlocks(); ++I) {      emitLoadCompareBlock(I); @@ -709,8 +704,12 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,    const unsigned MaxNumLoads =        TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); +  unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences() +                                  ? MemCmpEqZeroNumLoadsPerBlock +                                  : TLI->getMemcmpEqZeroLoadsPerBlock(); +    MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, -                            IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); +                            IsUsedForZeroCmp, NumLoadsPerBlock, *DL);    // Don't expand if this will require more loads than desired by the target.    if (Expansion.getNumLoads() == 0) { diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 6ef97d6dd5ec..bc747fc610f8 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -93,11 +93,11 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {    assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&           "Inserted value must be in a physical register"); -  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); +  LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);    if (MI->allDefsAreDead()) {      MI->setDesc(TII->get(TargetOpcode::KILL)); -    DEBUG(dbgs() << "subreg: replaced by: " << *MI); +    LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);      return true;    } @@ -110,10 +110,10 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {        MI->setDesc(TII->get(TargetOpcode::KILL));        MI->RemoveOperand(3);     // SubIdx        MI->RemoveOperand(1);     // Imm -      DEBUG(dbgs() << "subreg: replace by: " << *MI); +      LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI);        return true;      } -    DEBUG(dbgs() << "subreg: eliminated!"); +    LLVM_DEBUG(dbgs() << "subreg: eliminated!");    } else {      TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,                       MI->getOperand(2).isKill()); @@ -122,10 +122,10 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {      MachineBasicBlock::iterator CopyMI = MI;      --CopyMI;      CopyMI->addRegisterDefined(DstReg); -    DEBUG(dbgs() << "subreg: " << *CopyMI); +    LLVM_DEBUG(dbgs() << "subreg: " << *CopyMI);    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');    MBB->erase(MI);    return true;  } @@ -133,9 +133,9 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {  bool ExpandPostRA::LowerCopy(MachineInstr *MI) {    if (MI->allDefsAreDead()) { -    DEBUG(dbgs() << "dead copy: " << *MI); +    LLVM_DEBUG(dbgs() << "dead copy: " << *MI);      MI->setDesc(TII->get(TargetOpcode::KILL)); -    DEBUG(dbgs() << "replaced by: " << *MI); +    LLVM_DEBUG(dbgs() << "replaced by: " << *MI);      return true;    } @@ -144,14 +144,15 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {    bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());    if (IdentityCopy || SrcMO.isUndef()) { -    DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy:    ") << *MI); +    LLVM_DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy:    ") +                      << *MI);      // No need to insert an identity copy instruction, but replace with a KILL      // if liveness is changed.      if (SrcMO.isUndef() || MI->getNumOperands() > 2) {        // We must make sure the super-register gets killed. Replace the        // instruction with KILL.        MI->setDesc(TII->get(TargetOpcode::KILL)); -      DEBUG(dbgs() << "replaced by:   " << *MI); +      LLVM_DEBUG(dbgs() << "replaced by:   " << *MI);        return true;      }      // Vanilla identity copy. @@ -159,13 +160,13 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {      return true;    } -  DEBUG(dbgs() << "real copy:   " << *MI); +  LLVM_DEBUG(dbgs() << "real copy:   " << *MI);    TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),                     DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());    if (MI->getNumOperands() > 2)      TransferImplicitOperands(MI); -  DEBUG({ +  LLVM_DEBUG({      MachineBasicBlock::iterator dMI = MI;      dbgs() << "replaced by: " << *(--dMI);    }); @@ -177,9 +178,9 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {  /// copies.  ///  bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { -  DEBUG(dbgs() << "Machine Function\n" -               << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" -               << "********** Function: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Machine Function\n" +                    << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" +                    << "********** Function: " << MF.getName() << '\n');    TRI = MF.getSubtarget().getRegisterInfo();    TII = MF.getSubtarget().getInstrInfo(); diff --git a/contrib/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp index abf487a4f198..7552ba8cd85d 100644 --- a/contrib/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandReductions.cpp @@ -78,13 +78,15 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {  bool expandReductions(Function &F, const TargetTransformInfo *TTI) {    bool Changed = false; -  SmallVector<IntrinsicInst*, 4> Worklist; +  SmallVector<IntrinsicInst *, 4> Worklist;    for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)      if (auto II = dyn_cast<IntrinsicInst>(&*I))        Worklist.push_back(II);    for (auto *II : Worklist) {      IRBuilder<> Builder(II); +    bool IsOrdered = false; +    Value *Acc = nullptr;      Value *Vec = nullptr;      auto ID = II->getIntrinsicID();      auto MRK = RecurrenceDescriptor::MRK_Invalid; @@ -92,11 +94,10 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {      case Intrinsic::experimental_vector_reduce_fadd:      case Intrinsic::experimental_vector_reduce_fmul:        // FMFs must be attached to the call, otherwise it's an ordered reduction -      // and it can't be handled by generating this shuffle sequence. -      // TODO: Implement scalarization of ordered reductions here for targets -      // without native support. +      // and it can't be handled by generating a shuffle sequence.        if (!II->getFastMathFlags().isFast()) -        continue; +        IsOrdered = true; +      Acc = II->getArgOperand(0);        Vec = II->getArgOperand(1);        break;      case Intrinsic::experimental_vector_reduce_add: @@ -118,7 +119,9 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {      }      if (!TTI->shouldExpandReduction(II))        continue; -    auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); +    Value *Rdx = +        IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) +                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);      II->replaceAllUsesWith(Rdx);      II->eraseFromParent();      Changed = true; diff --git a/contrib/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm/lib/CodeGen/FaultMaps.cpp index 2924b011e0c1..361558a0e562 100644 --- a/contrib/llvm/lib/CodeGen/FaultMaps.cpp +++ b/contrib/llvm/lib/CodeGen/FaultMaps.cpp @@ -62,17 +62,17 @@ void FaultMaps::serializeToFaultMapSection() {    // Emit a dummy symbol to force section inclusion.    OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); -  DEBUG(dbgs() << "********** Fault Map Output **********\n"); +  LLVM_DEBUG(dbgs() << "********** Fault Map Output **********\n");    // Header    OS.EmitIntValue(FaultMapVersion, 1); // Version.    OS.EmitIntValue(0, 1);               // Reserved.    OS.EmitIntValue(0, 2);               // Reserved. -  DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n"); +  LLVM_DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n");    OS.EmitIntValue(FunctionInfos.size(), 4); -  DEBUG(dbgs() << WFMP << "functions:\n"); +  LLVM_DEBUG(dbgs() << WFMP << "functions:\n");    for (const auto &FFI : FunctionInfos)      emitFunctionInfo(FFI.first, FFI.second); @@ -82,25 +82,25 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,                                   const FunctionFaultInfos &FFI) {    MCStreamer &OS = *AP.OutStreamer; -  DEBUG(dbgs() << WFMP << "  function addr: " << *FnLabel << "\n"); +  LLVM_DEBUG(dbgs() << WFMP << "  function addr: " << *FnLabel << "\n");    OS.EmitSymbolValue(FnLabel, 8); -  DEBUG(dbgs() << WFMP << "  #faulting PCs: " << FFI.size() << "\n"); +  LLVM_DEBUG(dbgs() << WFMP << "  #faulting PCs: " << FFI.size() << "\n");    OS.EmitIntValue(FFI.size(), 4);    OS.EmitIntValue(0, 4); // Reserved    for (auto &Fault : FFI) { -    DEBUG(dbgs() << WFMP << "    fault type: " -          << faultTypeToString(Fault.Kind) << "\n"); +    LLVM_DEBUG(dbgs() << WFMP << "    fault type: " +                      << faultTypeToString(Fault.Kind) << "\n");      OS.EmitIntValue(Fault.Kind, 4); -    DEBUG(dbgs() << WFMP << "    faulting PC offset: " -          << *Fault.FaultingOffsetExpr << "\n"); +    LLVM_DEBUG(dbgs() << WFMP << "    faulting PC offset: " +                      << *Fault.FaultingOffsetExpr << "\n");      OS.EmitValue(Fault.FaultingOffsetExpr, 4); -    DEBUG(dbgs() << WFMP << "    fault handler PC offset: " -          << *Fault.HandlerOffsetExpr << "\n"); +    LLVM_DEBUG(dbgs() << WFMP << "    fault handler PC offset: " +                      << *Fault.HandlerOffsetExpr << "\n");      OS.EmitValue(Fault.HandlerOffsetExpr, 4);    }  } diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp index 9c71b18619a1..581cd423f2d4 100644 --- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp +++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp @@ -41,8 +41,11 @@ INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE,                  "Contiguously Lay Out Funclets", false, false)  bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { +  // Even though this gets information from getEHScopeMembership(), this pass is +  // only necessary for funclet-based EH personalities, in which these EH scopes +  // are outlined at the end.    DenseMap<const MachineBasicBlock *, int> FuncletMembership = -      getFuncletMembership(F); +      getEHScopeMembership(F);    if (FuncletMembership.empty())      return false; diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp index 4361d8b248c8..31ddeadbd97a 100644 --- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp @@ -38,7 +38,7 @@ namespace {  /// directed by the GCStrategy. It also performs automatic root initialization  /// and custom intrinsic lowering.  class LowerIntrinsics : public FunctionPass { -  bool PerformDefaultLowering(Function &F, GCStrategy &Coll); +  bool PerformDefaultLowering(Function &F, GCStrategy &S);  public:    static char ID; @@ -61,7 +61,7 @@ class GCMachineCodeAnalysis : public MachineFunctionPass {    const TargetInstrInfo *TII;    void FindSafePoints(MachineFunction &MF); -  void VisitCallPoint(MachineBasicBlock::iterator MI); +  void VisitCallPoint(MachineBasicBlock::iterator CI);    MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,                          const DebugLoc &DL) const; diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 114c068749eb..07de31bec660 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -38,6 +38,9 @@ bool CallLowering::lowerCall(      ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},                      i < NumFixedArgs};      setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); +    // We don't currently support swifterror or swiftself args. +    if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf()) +      return false;      OrigArgs.push_back(OrigArg);      ++i;    } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp new file mode 100644 index 000000000000..0bc5b87de150 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -0,0 +1,81 @@ +//===-- lib/CodeGen/GlobalISel/GICombiner.cpp -----------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file constains common code to combine machine functions at generic +// level. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/GlobalISel/GISelWorkList.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; + +Combiner::Combiner(CombinerInfo &Info, const TargetPassConfig *TPC) +    : CInfo(Info), TPC(TPC) { +  (void)this->TPC; // FIXME: Remove when used. +} + +bool Combiner::combineMachineInstrs(MachineFunction &MF) { +  // If the ISel pipeline failed, do not bother running this pass. +  // FIXME: Should this be here or in individual combiner passes. +  if (MF.getProperties().hasProperty( +          MachineFunctionProperties::Property::FailedISel)) +    return false; + +  MRI = &MF.getRegInfo(); +  Builder.setMF(MF); + +  LLVM_DEBUG(dbgs() << "Generic MI Combiner for: " << MF.getName() << '\n'); + +  MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); + +  bool MFChanged = false; +  bool Changed; + +  do { +    // Collect all instructions. Do a post order traversal for basic blocks and +    // insert with list bottom up, so while we pop_back_val, we'll traverse top +    // down RPOT. +    Changed = false; +    GISelWorkList<512> WorkList; +    for (MachineBasicBlock *MBB : post_order(&MF)) { +      if (MBB->empty()) +        continue; +      for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) { +        MachineInstr *CurMI = &*MII; +        ++MII; +        // Erase dead insts before even adding to the list. +        if (isTriviallyDead(*CurMI, *MRI)) { +          LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n"); +          CurMI->eraseFromParentAndMarkDBGValuesForRemoval(); +          continue; +        } +        WorkList.insert(CurMI); +      } +    } +    // Main Loop. Process the instructions here. +    while (!WorkList.empty()) { +      MachineInstr *CurrInst = WorkList.pop_back_val(); +      LLVM_DEBUG(dbgs() << "Try combining " << *CurrInst << "\n";); +      Changed |= CInfo.combine(*CurrInst, Builder); +    } +    MFChanged |= Changed; +  } while (Changed); + +  return MFChanged; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp new file mode 100644 index 000000000000..44e904a6391b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -0,0 +1,41 @@ +//== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == // +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define DEBUG_TYPE "gi-combine" + +using namespace llvm; + +CombinerHelper::CombinerHelper(MachineIRBuilder &B) : +  Builder(B), MRI(Builder.getMF().getRegInfo()) {} + +bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { +  if (MI.getOpcode() != TargetOpcode::COPY) +    return false; +  unsigned DstReg = MI.getOperand(0).getReg(); +  unsigned SrcReg = MI.getOperand(1).getReg(); +  LLT DstTy = MRI.getType(DstReg); +  LLT SrcTy = MRI.getType(SrcReg); +  // Simple Copy Propagation. +  // a(sx) = COPY b(sx) -> Replace all uses of a with b. +  if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) { +    MI.eraseFromParent(); +    MRI.replaceRegWith(DstReg, SrcReg); +    return true; +  } +  return false; +} + +bool CombinerHelper::tryCombine(MachineInstr &MI) { +  return tryCombineCopy(MI); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a329a71e2c95..bafb7a05536d 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -26,6 +26,7 @@  #include "llvm/CodeGen/MachineMemOperand.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetPassConfig.h" @@ -102,37 +103,103 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) {  }  void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.addRequired<StackProtector>();    AU.addRequired<TargetPassConfig>(); +  getSelectionDAGFallbackAnalysisUsage(AU);    MachineFunctionPass::getAnalysisUsage(AU);  } -unsigned IRTranslator::getOrCreateVReg(const Value &Val) { -  unsigned &ValReg = ValToVReg[&Val]; +static void computeValueLLTs(const DataLayout &DL, Type &Ty, +                             SmallVectorImpl<LLT> &ValueTys, +                             SmallVectorImpl<uint64_t> *Offsets = nullptr, +                             uint64_t StartingOffset = 0) { +  // Given a struct type, recursively traverse the elements. +  if (StructType *STy = dyn_cast<StructType>(&Ty)) { +    const StructLayout *SL = DL.getStructLayout(STy); +    for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) +      computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, +                       StartingOffset + SL->getElementOffset(I)); +    return; +  } +  // Given an array type, recursively traverse the elements. +  if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { +    Type *EltTy = ATy->getElementType(); +    uint64_t EltSize = DL.getTypeAllocSize(EltTy); +    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) +      computeValueLLTs(DL, *EltTy, ValueTys, Offsets, +                       StartingOffset + i * EltSize); +    return; +  } +  // Interpret void as zero return values. +  if (Ty.isVoidTy()) +    return; +  // Base case: we can get an LLT for this LLVM IR type. +  ValueTys.push_back(getLLTForType(Ty, DL)); +  if (Offsets != nullptr) +    Offsets->push_back(StartingOffset * 8); +} + +IRTranslator::ValueToVRegInfo::VRegListT & +IRTranslator::allocateVRegs(const Value &Val) { +  assert(!VMap.contains(Val) && "Value already allocated in VMap"); +  auto *Regs = VMap.getVRegs(Val); +  auto *Offsets = VMap.getOffsets(Val); +  SmallVector<LLT, 4> SplitTys; +  computeValueLLTs(*DL, *Val.getType(), SplitTys, +                   Offsets->empty() ? Offsets : nullptr); +  for (unsigned i = 0; i < SplitTys.size(); ++i) +    Regs->push_back(0); +  return *Regs; +} + +ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) { +  auto VRegsIt = VMap.findVRegs(Val); +  if (VRegsIt != VMap.vregs_end()) +    return *VRegsIt->second; -  if (ValReg) -    return ValReg; +  if (Val.getType()->isVoidTy()) +    return *VMap.getVRegs(Val); + +  // Create entry for this type. +  auto *VRegs = VMap.getVRegs(Val); +  auto *Offsets = VMap.getOffsets(Val); -  // Fill ValRegsSequence with the sequence of registers -  // we need to concat together to produce the value.    assert(Val.getType()->isSized() &&           "Don't know how to create an empty vreg"); -  unsigned VReg = -      MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL)); -  ValReg = VReg; -  if (auto CV = dyn_cast<Constant>(&Val)) { -    bool Success = translate(*CV, VReg); +  SmallVector<LLT, 4> SplitTys; +  computeValueLLTs(*DL, *Val.getType(), SplitTys, +                   Offsets->empty() ? Offsets : nullptr); + +  if (!isa<Constant>(Val)) { +    for (auto Ty : SplitTys) +      VRegs->push_back(MRI->createGenericVirtualRegister(Ty)); +    return *VRegs; +  } + +  if (Val.getType()->isAggregateType()) { +    // UndefValue, ConstantAggregateZero +    auto &C = cast<Constant>(Val); +    unsigned Idx = 0; +    while (auto Elt = C.getAggregateElement(Idx++)) { +      auto EltRegs = getOrCreateVRegs(*Elt); +      std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs)); +    } +  } else { +    assert(SplitTys.size() == 1 && "unexpectedly split LLT"); +    VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0])); +    bool Success = translate(cast<Constant>(Val), VRegs->front());      if (!Success) {        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",                                   MF->getFunction().getSubprogram(),                                   &MF->getFunction().getEntryBlock());        R << "unable to translate constant: " << ore::NV("Type", Val.getType());        reportTranslationError(*MF, *TPC, *ORE, R); -      return VReg; +      return *VRegs;      }    } -  return VReg; +  return *VRegs;  }  int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { @@ -164,6 +231,20 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {    } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {      Alignment = LI->getAlignment();      ValTy = LI->getType(); +  } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { +    // TODO(PR27168): This instruction has no alignment attribute, but unlike +    // the default alignment for load/store, the default here is to assume +    // it has NATURAL alignment, not DataLayout-specified alignment. +    const DataLayout &DL = AI->getModule()->getDataLayout(); +    Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); +    ValTy = AI->getCompareOperand()->getType(); +  } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { +    // TODO(PR27168): This instruction has no alignment attribute, but unlike +    // the default alignment for load/store, the default here is to assume +    // it has NATURAL alignment, not DataLayout-specified alignment. +    const DataLayout &DL = AI->getModule()->getDataLayout(); +    Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType()); +    ValTy = AI->getType();    } else {      OptimizationRemarkMissed R("gisel-irtranslator", "", &I);      R << "unable to translate memop: " << ore::NV("Opcode", &I); @@ -243,7 +324,11 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {    // The target may mess up with the insertion point, but    // this is not important as a return is the last instruction    // of the block anyway. -  return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret)); + +  // FIXME: this interface should simplify when CallLowering gets adapted to +  // multiple VRegs per Value. +  unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0; +  return CLI->lowerReturn(MIRBuilder, Ret, VReg);  }  bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { @@ -342,15 +427,23 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {    if (DL->getTypeStoreSize(LI.getType()) == 0)      return true; -  unsigned Res = getOrCreateVReg(LI); -  unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); +  ArrayRef<unsigned> Regs = getOrCreateVRegs(LI); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); +  unsigned Base = getOrCreateVReg(*LI.getPointerOperand()); + +  for (unsigned i = 0; i < Regs.size(); ++i) { +    unsigned Addr = 0; +    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + +    MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); +    unsigned BaseAlign = getMemOpAlignment(LI); +    auto MMO = MF->getMachineMemOperand( +        Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, +        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, +        LI.getSyncScopeID(), LI.getOrdering()); +    MIRBuilder.buildLoad(Regs[i], Addr, *MMO); +  } -  MIRBuilder.buildLoad( -      Res, Addr, -      *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), -                                Flags, DL->getTypeStoreSize(LI.getType()), -                                getMemOpAlignment(LI), AAMDNodes(), nullptr, -                                LI.getSyncScopeID(), LI.getOrdering()));    return true;  } @@ -363,50 +456,61 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {    if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)      return true; -  unsigned Val = getOrCreateVReg(*SI.getValueOperand()); -  unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); - -  MIRBuilder.buildStore( -      Val, Addr, -      *MF->getMachineMemOperand( -          MachinePointerInfo(SI.getPointerOperand()), Flags, -          DL->getTypeStoreSize(SI.getValueOperand()->getType()), -          getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), -          SI.getOrdering())); +  ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand()); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand()); +  unsigned Base = getOrCreateVReg(*SI.getPointerOperand()); + +  for (unsigned i = 0; i < Vals.size(); ++i) { +    unsigned Addr = 0; +    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + +    MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); +    unsigned BaseAlign = getMemOpAlignment(SI); +    auto MMO = MF->getMachineMemOperand( +        Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8, +        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, +        SI.getSyncScopeID(), SI.getOrdering()); +    MIRBuilder.buildStore(Vals[i], Addr, *MMO); +  }    return true;  } -bool IRTranslator::translateExtractValue(const User &U, -                                         MachineIRBuilder &MIRBuilder) { +static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {    const Value *Src = U.getOperand(0);    Type *Int32Ty = Type::getInt32Ty(U.getContext()); -  SmallVector<Value *, 1> Indices; - -  // If Src is a single element ConstantStruct, translate extractvalue -  // to that element to avoid inserting a cast instruction. -  if (auto CS = dyn_cast<ConstantStruct>(Src)) -    if (CS->getNumOperands() == 1) { -      unsigned Res = getOrCreateVReg(*CS->getOperand(0)); -      ValToVReg[&U] = Res; -      return true; -    }    // getIndexedOffsetInType is designed for GEPs, so the first index is the    // usual array element rather than looking into the actual aggregate. +  SmallVector<Value *, 1> Indices;    Indices.push_back(ConstantInt::get(Int32Ty, 0));    if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {      for (auto Idx : EVI->indices())        Indices.push_back(ConstantInt::get(Int32Ty, Idx)); +  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { +    for (auto Idx : IVI->indices()) +      Indices.push_back(ConstantInt::get(Int32Ty, Idx));    } else {      for (unsigned i = 1; i < U.getNumOperands(); ++i)        Indices.push_back(U.getOperand(i));    } -  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); +  return 8 * static_cast<uint64_t>( +                 DL.getIndexedOffsetInType(Src->getType(), Indices)); +} -  unsigned Res = getOrCreateVReg(U); -  MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset); +bool IRTranslator::translateExtractValue(const User &U, +                                         MachineIRBuilder &MIRBuilder) { +  const Value *Src = U.getOperand(0); +  uint64_t Offset = getOffsetFromIndices(U, *DL); +  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src); +  unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) - +                 Offsets.begin(); +  auto &DstRegs = allocateVRegs(U); + +  for (unsigned i = 0; i < DstRegs.size(); ++i) +    DstRegs[i] = SrcRegs[Idx++];    return true;  } @@ -414,37 +518,33 @@ bool IRTranslator::translateExtractValue(const User &U,  bool IRTranslator::translateInsertValue(const User &U,                                          MachineIRBuilder &MIRBuilder) {    const Value *Src = U.getOperand(0); -  Type *Int32Ty = Type::getInt32Ty(U.getContext()); -  SmallVector<Value *, 1> Indices; - -  // getIndexedOffsetInType is designed for GEPs, so the first index is the -  // usual array element rather than looking into the actual aggregate. -  Indices.push_back(ConstantInt::get(Int32Ty, 0)); - -  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { -    for (auto Idx : IVI->indices()) -      Indices.push_back(ConstantInt::get(Int32Ty, Idx)); -  } else { -    for (unsigned i = 2; i < U.getNumOperands(); ++i) -      Indices.push_back(U.getOperand(i)); +  uint64_t Offset = getOffsetFromIndices(U, *DL); +  auto &DstRegs = allocateVRegs(U); +  ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); +  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); +  ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); +  auto InsertedIt = InsertedRegs.begin(); + +  for (unsigned i = 0; i < DstRegs.size(); ++i) { +    if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end()) +      DstRegs[i] = *InsertedIt++; +    else +      DstRegs[i] = SrcRegs[i];    } -  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); - -  unsigned Res = getOrCreateVReg(U); -  unsigned Inserted = getOrCreateVReg(*U.getOperand(1)); -  MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset); -    return true;  }  bool IRTranslator::translateSelect(const User &U,                                     MachineIRBuilder &MIRBuilder) { -  unsigned Res = getOrCreateVReg(U);    unsigned Tst = getOrCreateVReg(*U.getOperand(0)); -  unsigned Op0 = getOrCreateVReg(*U.getOperand(1)); -  unsigned Op1 = getOrCreateVReg(*U.getOperand(2)); -  MIRBuilder.buildSelect(Res, Tst, Op0, Op1); +  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U); +  ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); +  ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); + +  for (unsigned i = 0; i < ResRegs.size(); ++i) +    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); +    return true;  } @@ -453,15 +553,16 @@ bool IRTranslator::translateBitCast(const User &U,    // If we're bitcasting to the source type, we can reuse the source vreg.    if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==        getLLTForType(*U.getType(), *DL)) { -    // Get the source vreg now, to avoid invalidating ValToVReg.      unsigned SrcReg = getOrCreateVReg(*U.getOperand(0)); -    unsigned &Reg = ValToVReg[&U]; +    auto &Regs = *VMap.getVRegs(U);      // If we already assigned a vreg for this bitcast, we can't change that.      // Emit a copy to satisfy the users we already emitted. -    if (Reg) -      MIRBuilder.buildCopy(Reg, SrcReg); -    else -      Reg = SrcReg; +    if (!Regs.empty()) +      MIRBuilder.buildCopy(Regs[0], SrcReg); +    else { +      Regs.push_back(SrcReg); +      VMap.getOffsets(U)->push_back(0); +    }      return true;    }    return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); @@ -516,10 +617,6 @@ bool IRTranslator::translateGetElementPtr(const User &U,          Offset = 0;        } -      // N = N + Idx * ElementSize; -      unsigned ElementSizeReg = -          getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); -        unsigned IdxReg = getOrCreateVReg(*Idx);        if (MRI->getType(IdxReg) != OffsetTy) {          unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); @@ -527,11 +624,20 @@ bool IRTranslator::translateGetElementPtr(const User &U,          IdxReg = NewIdxReg;        } -      unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy); -      MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg); +      // N = N + Idx * ElementSize; +      // Avoid doing it for ElementSize of 1. +      unsigned GepOffsetReg; +      if (ElementSize != 1) { +        unsigned ElementSizeReg = +            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); + +        GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); +        MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); +      } else +        GepOffsetReg = IdxReg;        unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); -      MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); +      MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);        BaseReg = NewBaseReg;      }    } @@ -607,14 +713,10 @@ void IRTranslator::getStackGuard(unsigned DstReg,  bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,                                                MachineIRBuilder &MIRBuilder) { -  LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL); -  LLT s1 = LLT::scalar(1); -  unsigned Width = Ty.getSizeInBits(); -  unsigned Res = MRI->createGenericVirtualRegister(Ty); -  unsigned Overflow = MRI->createGenericVirtualRegister(s1); +  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);    auto MIB = MIRBuilder.buildInstr(Op) -                 .addDef(Res) -                 .addDef(Overflow) +                 .addDef(ResRegs[0]) +                 .addDef(ResRegs[1])                   .addUse(getOrCreateVReg(*CI.getOperand(0)))                   .addUse(getOrCreateVReg(*CI.getOperand(1))); @@ -624,7 +726,6 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,      MIB.addUse(Zero);    } -  MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width});    return true;  } @@ -647,7 +748,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,      const Value *Address = DI.getAddress();      if (!Address || isa<UndefValue>(Address)) { -      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");        return true;      } @@ -741,6 +842,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,          .addDef(getOrCreateVReg(CI))          .addUse(getOrCreateVReg(*CI.getArgOperand(0)));      return true; +  case Intrinsic::fabs: +    MIRBuilder.buildInstr(TargetOpcode::G_FABS) +        .addDef(getOrCreateVReg(CI)) +        .addUse(getOrCreateVReg(*CI.getArgOperand(0))); +    return true;    case Intrinsic::fma:      MIRBuilder.buildInstr(TargetOpcode::G_FMA)          .addDef(getOrCreateVReg(CI)) @@ -748,6 +854,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,          .addUse(getOrCreateVReg(*CI.getArgOperand(1)))          .addUse(getOrCreateVReg(*CI.getArgOperand(2)));      return true; +  case Intrinsic::fmuladd: { +    const TargetMachine &TM = MF->getTarget(); +    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); +    unsigned Dst = getOrCreateVReg(CI); +    unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0)); +    unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1)); +    unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2)); +    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && +        TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { +      // TODO: Revisit this to see if we should move this part of the +      // lowering to the combiner. +      MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2); +    } else { +      LLT Ty = getLLTForType(*CI.getType(), *DL); +      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1); +      MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2); +    } +    return true; +  }    case Intrinsic::memcpy:    case Intrinsic::memmove:    case Intrinsic::memset: @@ -807,6 +932,34 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,    return true;  } +unsigned IRTranslator::packRegs(const Value &V, +                                  MachineIRBuilder &MIRBuilder) { +  ArrayRef<unsigned> Regs = getOrCreateVRegs(V); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); +  LLT BigTy = getLLTForType(*V.getType(), *DL); + +  if (Regs.size() == 1) +    return Regs[0]; + +  unsigned Dst = MRI->createGenericVirtualRegister(BigTy); +  MIRBuilder.buildUndef(Dst); +  for (unsigned i = 0; i < Regs.size(); ++i) { +    unsigned NewDst = MRI->createGenericVirtualRegister(BigTy); +    MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]); +    Dst = NewDst; +  } +  return Dst; +} + +void IRTranslator::unpackRegs(const Value &V, unsigned Src, +                                MachineIRBuilder &MIRBuilder) { +  ArrayRef<unsigned> Regs = getOrCreateVRegs(V); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); + +  for (unsigned i = 0; i < Regs.size(); ++i) +    MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]); +} +  bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {    const CallInst &CI = cast<CallInst>(U);    auto TII = MF->getTarget().getIntrinsicInfo(); @@ -826,16 +979,24 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {        ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));    } +  bool IsSplitType = valueIsSplit(CI);    if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { -    unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); +    unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister( +                                     getLLTForType(*CI.getType(), *DL)) +                               : getOrCreateVReg(CI); +      SmallVector<unsigned, 8> Args;      for (auto &Arg: CI.arg_operands()) -      Args.push_back(getOrCreateVReg(*Arg)); +      Args.push_back(packRegs(*Arg, MIRBuilder));      MF->getFrameInfo().setHasCalls(true); -    return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { +    bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {        return getOrCreateVReg(*CI.getCalledValue());      }); + +    if (IsSplitType) +      unpackRegs(CI, Res, MIRBuilder); +    return Success;    }    assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -843,7 +1004,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {    if (translateKnownIntrinsic(CI, ID, MIRBuilder))      return true; -  unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); +  unsigned Res = 0; +  if (!CI.getType()->isVoidTy()) { +    if (IsSplitType) +      Res = +          MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL)); +    else +      Res = getOrCreateVReg(CI); +  }    MachineInstrBuilder MIB =        MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); @@ -851,9 +1019,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {      // Some intrinsics take metadata parameters. Reject them.      if (isa<MetadataAsValue>(Arg))        return false; -    MIB.addUse(getOrCreateVReg(*Arg)); +    MIB.addUse(packRegs(*Arg, MIRBuilder));    } +  if (IsSplitType) +    unpackRegs(CI, Res, MIRBuilder); +    // Add a MachineMemOperand if it is a target mem intrinsic.    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();    TargetLowering::IntrinsicInfo Info; @@ -897,15 +1068,18 @@ bool IRTranslator::translateInvoke(const User &U,    MCSymbol *BeginSymbol = Context.createTempSymbol();    MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); -  unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I); +  unsigned Res = +        MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));    SmallVector<unsigned, 8> Args;    for (auto &Arg: I.arg_operands()) -    Args.push_back(getOrCreateVReg(*Arg)); +    Args.push_back(packRegs(*Arg, MIRBuilder));    if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,                        [&]() { return getOrCreateVReg(*I.getCalledValue()); }))      return false; +  unpackRegs(I, Res, MIRBuilder); +    MCSymbol *EndSymbol = Context.createTempSymbol();    MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); @@ -964,27 +1138,18 @@ bool IRTranslator::translateLandingPad(const User &U,      return false;    MBB.addLiveIn(ExceptionReg); -  unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]), -           Tmp = MRI->createGenericVirtualRegister(Ty); -  MIRBuilder.buildCopy(VReg, ExceptionReg); -  MIRBuilder.buildInsert(Tmp, Undef, VReg, 0); +  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP); +  MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);    unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);    if (!SelectorReg)      return false;    MBB.addLiveIn(SelectorReg); - -  // N.b. the exception selector register always has pointer type and may not -  // match the actual IR-level type in the landingpad so an extra cast is -  // needed.    unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);    MIRBuilder.buildCopy(PtrVReg, SelectorReg); +  MIRBuilder.buildCast(ResRegs[1], PtrVReg); -  VReg = MRI->createGenericVirtualRegister(Tys[1]); -  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg); -  MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg, -                         Tys[0].getSizeInBits());    return true;  } @@ -992,6 +1157,9 @@ bool IRTranslator::translateAlloca(const User &U,                                     MachineIRBuilder &MIRBuilder) {    auto &AI = cast<AllocaInst>(U); +  if (AI.isSwiftError()) +    return false; +    if (AI.isStaticAlloca()) {      unsigned Res = getOrCreateVReg(AI);      int FI = getOrCreateFrameIndex(AI); @@ -999,6 +1167,10 @@ bool IRTranslator::translateAlloca(const User &U,      return true;    } +  // FIXME: support stack probing for Windows. +  if (MF->getTarget().getTargetTriple().isOSWindows()) +    return false; +    // Now we're in the harder dynamic case.    Type *Ty = AI.getAllocatedType();    unsigned Align = @@ -1070,9 +1242,16 @@ bool IRTranslator::translateInsertElement(const User &U,    // not a legal vector type in LLT.    if (U.getType()->getVectorNumElements() == 1) {      unsigned Elt = getOrCreateVReg(*U.getOperand(1)); -    ValToVReg[&U] = Elt; +    auto &Regs = *VMap.getVRegs(U); +    if (Regs.empty()) { +      Regs.push_back(Elt); +      VMap.getOffsets(U)->push_back(0); +    } else { +      MIRBuilder.buildCopy(Regs[0], Elt); +    }      return true;    } +    unsigned Res = getOrCreateVReg(U);    unsigned Val = getOrCreateVReg(*U.getOperand(0));    unsigned Elt = getOrCreateVReg(*U.getOperand(1)); @@ -1087,7 +1266,13 @@ bool IRTranslator::translateExtractElement(const User &U,    // not a legal vector type in LLT.    if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {      unsigned Elt = getOrCreateVReg(*U.getOperand(0)); -    ValToVReg[&U] = Elt; +    auto &Regs = *VMap.getVRegs(U); +    if (Regs.empty()) { +      Regs.push_back(Elt); +      VMap.getOffsets(U)->push_back(0); +    } else { +      MIRBuilder.buildCopy(Regs[0], Elt); +    }      return true;    }    unsigned Res = getOrCreateVReg(U); @@ -1109,17 +1294,115 @@ bool IRTranslator::translateShuffleVector(const User &U,  bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {    const PHINode &PI = cast<PHINode>(U); -  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI); -  MIB.addDef(getOrCreateVReg(PI)); -  PendingPHIs.emplace_back(&PI, MIB.getInstr()); +  SmallVector<MachineInstr *, 4> Insts; +  for (auto Reg : getOrCreateVRegs(PI)) { +    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg); +    Insts.push_back(MIB.getInstr()); +  } + +  PendingPHIs.emplace_back(&PI, std::move(Insts)); +  return true; +} + +bool IRTranslator::translateAtomicCmpXchg(const User &U, +                                          MachineIRBuilder &MIRBuilder) { +  const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U); + +  if (I.isWeak()) +    return false; + +  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile +                              : MachineMemOperand::MONone; +  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + +  Type *ResType = I.getType(); +  Type *ValType = ResType->Type::getStructElementType(0); + +  auto Res = getOrCreateVRegs(I); +  unsigned OldValRes = Res[0]; +  unsigned SuccessRes = Res[1]; +  unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); +  unsigned Cmp = getOrCreateVReg(*I.getCompareOperand()); +  unsigned NewVal = getOrCreateVReg(*I.getNewValOperand()); + +  MIRBuilder.buildAtomicCmpXchgWithSuccess( +      OldValRes, SuccessRes, Addr, Cmp, NewVal, +      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), +                                Flags, DL->getTypeStoreSize(ValType), +                                getMemOpAlignment(I), AAMDNodes(), nullptr, +                                I.getSyncScopeID(), I.getSuccessOrdering(), +                                I.getFailureOrdering())); +  return true; +} + +bool IRTranslator::translateAtomicRMW(const User &U, +                                      MachineIRBuilder &MIRBuilder) { +  const AtomicRMWInst &I = cast<AtomicRMWInst>(U); + +  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile +                              : MachineMemOperand::MONone; +  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + +  Type *ResType = I.getType(); + +  unsigned Res = getOrCreateVReg(I); +  unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); +  unsigned Val = getOrCreateVReg(*I.getValOperand()); + +  unsigned Opcode = 0; +  switch (I.getOperation()) { +  default: +    llvm_unreachable("Unknown atomicrmw op"); +    return false; +  case AtomicRMWInst::Xchg: +    Opcode = TargetOpcode::G_ATOMICRMW_XCHG; +    break; +  case AtomicRMWInst::Add: +    Opcode = TargetOpcode::G_ATOMICRMW_ADD; +    break; +  case AtomicRMWInst::Sub: +    Opcode = TargetOpcode::G_ATOMICRMW_SUB; +    break; +  case AtomicRMWInst::And: +    Opcode = TargetOpcode::G_ATOMICRMW_AND; +    break; +  case AtomicRMWInst::Nand: +    Opcode = TargetOpcode::G_ATOMICRMW_NAND; +    break; +  case AtomicRMWInst::Or: +    Opcode = TargetOpcode::G_ATOMICRMW_OR; +    break; +  case AtomicRMWInst::Xor: +    Opcode = TargetOpcode::G_ATOMICRMW_XOR; +    break; +  case AtomicRMWInst::Max: +    Opcode = TargetOpcode::G_ATOMICRMW_MAX; +    break; +  case AtomicRMWInst::Min: +    Opcode = TargetOpcode::G_ATOMICRMW_MIN; +    break; +  case AtomicRMWInst::UMax: +    Opcode = TargetOpcode::G_ATOMICRMW_UMAX; +    break; +  case AtomicRMWInst::UMin: +    Opcode = TargetOpcode::G_ATOMICRMW_UMIN; +    break; +  } + +  MIRBuilder.buildAtomicRMW( +      Opcode, Res, Addr, Val, +      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), +                                Flags, DL->getTypeStoreSize(ResType), +                                getMemOpAlignment(I), AAMDNodes(), nullptr, +                                I.getSyncScopeID(), I.getOrdering()));    return true;  }  void IRTranslator::finishPendingPhis() { -  for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) { +  for (auto &Phi : PendingPHIs) {      const PHINode *PI = Phi.first; -    MachineInstrBuilder MIB(*MF, Phi.second); +    ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;      // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator      // won't create extra control flow here, otherwise we need to find the @@ -1133,17 +1416,27 @@ void IRTranslator::finishPendingPhis() {          continue;        HandledPreds.insert(IRPred); -      unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i)); +      ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));        for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { -        assert(Pred->isSuccessor(MIB->getParent()) && +        assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&                 "incorrect CFG at MachineBasicBlock level"); -        MIB.addUse(ValReg); -        MIB.addMBB(Pred); +        for (unsigned j = 0; j < ValRegs.size(); ++j) { +          MachineInstrBuilder MIB(*MF, ComponentPHIs[j]); +          MIB.addUse(ValRegs[j]); +          MIB.addMBB(Pred); +        }        }      }    }  } +bool IRTranslator::valueIsSplit(const Value &V, +                                SmallVectorImpl<uint64_t> *Offsets) { +  SmallVector<LLT, 4> SplitTys; +  computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); +  return SplitTys.size() > 1; +} +  bool IRTranslator::translate(const Instruction &Inst) {    CurBuilder.setDebugLoc(Inst.getDebugLoc());    switch(Inst.getOpcode()) { @@ -1162,9 +1455,15 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {      EntryBuilder.buildFConstant(Reg, *CF);    else if (isa<UndefValue>(C))      EntryBuilder.buildUndef(Reg); -  else if (isa<ConstantPointerNull>(C)) -    EntryBuilder.buildConstant(Reg, 0); -  else if (auto GV = dyn_cast<GlobalValue>(&C)) +  else if (isa<ConstantPointerNull>(C)) { +    // As we are trying to build a constant val of 0 into a pointer, +    // insert a cast to make them correct with respect to types. +    unsigned NullSize = DL->getTypeSizeInBits(C.getType()); +    auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); +    auto *ZeroVal = ConstantInt::get(ZeroTy, 0); +    unsigned ZeroReg = getOrCreateVReg(*ZeroVal); +    EntryBuilder.buildCast(Reg, ZeroReg); +  } else if (auto GV = dyn_cast<GlobalValue>(&C))      EntryBuilder.buildGlobalValue(Reg, GV);    else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {      if (!CAZ->getType()->isVectorTy()) @@ -1196,23 +1495,6 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {      default:        return false;      } -  } else if (auto CS = dyn_cast<ConstantStruct>(&C)) { -    // Return the element if it is a single element ConstantStruct. -    if (CS->getNumOperands() == 1) { -      unsigned EltReg = getOrCreateVReg(*CS->getOperand(0)); -      EntryBuilder.buildCast(Reg, EltReg); -      return true; -    } -    SmallVector<unsigned, 4> Ops; -    SmallVector<uint64_t, 4> Indices; -    uint64_t Offset = 0; -    for (unsigned i = 0; i < CS->getNumOperands(); ++i) { -      unsigned OpReg = getOrCreateVReg(*CS->getOperand(i)); -      Ops.push_back(OpReg); -      Indices.push_back(Offset); -      Offset += MRI->getType(OpReg).getSizeInBits(); -    } -    EntryBuilder.buildSequence(Reg, Ops, Indices);    } else if (auto CV = dyn_cast<ConstantVector>(&C)) {      if (CV->getNumOperands() == 1)        return translate(*CV->getOperand(0), Reg); @@ -1231,7 +1513,7 @@ void IRTranslator::finalizeFunction() {    // Release the memory used by the different maps we    // needed during the translation.    PendingPHIs.clear(); -  ValToVReg.clear(); +  VMap.reset();    FrameIndices.clear();    MachinePreds.clear();    // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it @@ -1291,8 +1573,22 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {    for (const Argument &Arg: F.args()) {      if (DL->getTypeStoreSize(Arg.getType()) == 0)        continue; // Don't handle zero sized types. -    VRegArgs.push_back(getOrCreateVReg(Arg)); +    VRegArgs.push_back( +        MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));    } + +  // We don't currently support translating swifterror or swiftself functions. +  for (auto &Arg : F.args()) { +    if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) { +      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", +                                 F.getSubprogram(), &F.getEntryBlock()); +      R << "unable to lower arguments due to swifterror/swiftself: " +        << ore::NV("Prototype", F.getType()); +      reportTranslationError(*MF, *TPC, *ORE, R); +      return false; +    } +  } +    if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",                                 F.getSubprogram(), &F.getEntryBlock()); @@ -1301,14 +1597,28 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {      return false;    } +  auto ArgIt = F.arg_begin(); +  for (auto &VArg : VRegArgs) { +    // If the argument is an unsplit scalar then don't use unpackRegs to avoid +    // creating redundant copies. +    if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { +      auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt)); +      assert(VRegs.empty() && "VRegs already populated?"); +      VRegs.push_back(VArg); +    } else { +      unpackRegs(*ArgIt, VArg, EntryBuilder); +    } +    ArgIt++; +  } +    // And translate the function! -  for (const BasicBlock &BB: F) { +  for (const BasicBlock &BB : F) {      MachineBasicBlock &MBB = getMBB(BB);      // Set the insertion point of all the following translations to      // the end of this basic block.      CurBuilder.setMBB(MBB); -    for (const Instruction &Inst: BB) { +    for (const Instruction &Inst : BB) {        if (translate(Inst))          continue; @@ -1358,5 +1668,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {    assert(&MF->front() == &NewEntryBB &&           "New entry wasn't next in the list of basic block!"); +  // Initialize stack protector information. +  StackProtector &SP = getAnalysis<StackProtector>(); +  SP.copyToMachineFrameInfo(MF->getFrameInfo()); +    return false;  } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 422cc2219aa8..c83c791327e4 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,7 +12,6 @@  #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"  #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/ScopeExit.h"  #include "llvm/ADT/Twine.h"  #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"  #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -57,23 +56,17 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {  void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<TargetPassConfig>(); +  getSelectionDAGFallbackAnalysisUsage(AU);    MachineFunctionPass::getAnalysisUsage(AU);  }  bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { -  const MachineRegisterInfo &MRI = MF.getRegInfo(); - -  // No matter what happens, whether we successfully select the function or not, -  // nothing is going to use the vreg types after us.  Make sure they disappear. -  auto ClearVRegTypesOnReturn = -      make_scope_exit([&]() { MRI.getVRegToType().clear(); }); -    // If the ISel pipeline failed, do not bother running that pass.    if (MF.getProperties().hasProperty(            MachineFunctionProperties::Property::FailedISel))      return false; -  DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');    const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();    const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); @@ -85,23 +78,18 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {    // FIXME: There are many other MF/MFI fields we need to initialize. +  MachineRegisterInfo &MRI = MF.getRegInfo();  #ifndef NDEBUG    // Check that our input is fully legal: we require the function to have the    // Legalized property, so it should be. -  // FIXME: This should be in the MachineVerifier, but it can't use the -  // LegalizerInfo as it's currently in the separate GlobalISel library. -  // The RegBankSelected property is already checked in the verifier. Note -  // that it has the same layering problem, but we only use inline methods so -  // end up not needing to link against the GlobalISel library. -  if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) -    for (MachineBasicBlock &MBB : MF) -      for (MachineInstr &MI : MBB) -        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) { -          reportGISelFailure(MF, TPC, MORE, "gisel-select", -                             "instruction is not legal", MI); -          return false; -        } - +  // FIXME: This should be in the MachineVerifier, as the RegBankSelected +  // property check already is. +  if (!DisableGISelLegalityCheck) +    if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { +      reportGISelFailure(MF, TPC, MORE, "gisel-select", +                         "instruction is not legal", *MI); +      return false; +    }  #endif    // FIXME: We could introduce new blocks and will need to fix the outer loop.    // Until then, keep track of the number of blocks to assert that we don't. @@ -129,12 +117,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {        else          --MII; -      DEBUG(dbgs() << "Selecting: \n  " << MI); +      LLVM_DEBUG(dbgs() << "Selecting: \n  " << MI);        // We could have folded this instruction away already, making it dead.        // If so, erase it.        if (isTriviallyDead(MI, MRI)) { -        DEBUG(dbgs() << "Is dead; erasing.\n"); +        LLVM_DEBUG(dbgs() << "Is dead; erasing.\n");          MI.eraseFromParentAndMarkDBGValuesForRemoval();          continue;        } @@ -147,7 +135,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {        }        // Dump the range of instructions that MI expanded into. -      DEBUG({ +      LLVM_DEBUG({          auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII);          dbgs() << "Into:\n";          for (auto &InsertedMI : make_range(InsertedBegin, AfterIt)) @@ -159,30 +147,63 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); +  for (MachineBasicBlock &MBB : MF) { +    if (MBB.empty()) +      continue; + +    // Try to find redundant copies b/w vregs of the same register class. +    bool ReachedBegin = false; +    for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) { +      // Select this instruction. +      MachineInstr &MI = *MII; + +      // And have our iterator point to the next instruction, if there is one. +      if (MII == Begin) +        ReachedBegin = true; +      else +        --MII; +      if (MI.getOpcode() != TargetOpcode::COPY) +        continue; +      unsigned SrcReg = MI.getOperand(1).getReg(); +      unsigned DstReg = MI.getOperand(0).getReg(); +      if (TargetRegisterInfo::isVirtualRegister(SrcReg) && +          TargetRegisterInfo::isVirtualRegister(DstReg)) { +        auto SrcRC = MRI.getRegClass(SrcReg); +        auto DstRC = MRI.getRegClass(DstReg); +        if (SrcRC == DstRC) { +          MRI.replaceRegWith(DstReg, SrcReg); +          MI.eraseFromParentAndMarkDBGValuesForRemoval(); +        } +      } +    } +  } +    // Now that selection is complete, there are no more generic vregs.  Verify    // that the size of the now-constrained vreg is unchanged and that it has a    // register class. -  for (auto &VRegToType : MRI.getVRegToType()) { -    unsigned VReg = VRegToType.first; -    auto *RC = MRI.getRegClassOrNull(VReg); +  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { +    unsigned VReg = TargetRegisterInfo::index2VirtReg(I); +      MachineInstr *MI = nullptr;      if (!MRI.def_empty(VReg))        MI = &*MRI.def_instr_begin(VReg);      else if (!MRI.use_empty(VReg))        MI = &*MRI.use_instr_begin(VReg); +    if (!MI) +      continue; -    if (MI && !RC) { +    const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg); +    if (!RC) {        reportGISelFailure(MF, TPC, MORE, "gisel-select",                           "VReg has no regclass after selection", *MI);        return false; -    } else if (!RC) -      continue; +    } -    if (VRegToType.second.isValid() && -        VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { -      reportGISelFailure(MF, TPC, MORE, "gisel-select", -                         "VReg has explicit size different from class size", -                         *MI); +    const LLT Ty = MRI.getType(VReg); +    if (Ty.isValid() && Ty.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { +      reportGISelFailure( +          MF, TPC, MORE, "gisel-select", +          "VReg's low-level type and register class have different sizes", *MI);        return false;      }    } @@ -199,6 +220,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {    auto &TLI = *MF.getSubtarget().getTargetLowering();    TLI.finalizeLowering(MF); +  LLVM_DEBUG({ +    dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; +    for (auto RuleID : CoverageInfo.covered()) +      dbgs() << " id" << RuleID; +    dbgs() << "\n\n"; +  });    CoverageInfo.emit(CoveragePrefix,                      MF.getSubtarget()                          .getTargetLowering() @@ -206,6 +233,11 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {                          .getTarget()                          .getBackendName()); +  // If we successfully selected the function nothing is going to use the vreg +  // types after us (otherwise MIRPrinter would need them). Make sure the types +  // disappear. +  MRI.clearVirtRegTypes(); +    // FIXME: Should we accurately track changes?    return true;  } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 88669bd68c00..5e77fcbb0ed9 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -46,50 +46,6 @@ bool InstructionSelector::constrainOperandRegToRegClass(        constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);  } -bool InstructionSelector::constrainSelectedInstRegOperands( -    MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, -    const RegisterBankInfo &RBI) const { -  MachineBasicBlock &MBB = *I.getParent(); -  MachineFunction &MF = *MBB.getParent(); -  MachineRegisterInfo &MRI = MF.getRegInfo(); - -  for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { -    MachineOperand &MO = I.getOperand(OpI); - -    // There's nothing to be done on non-register operands. -    if (!MO.isReg()) -      continue; - -    DEBUG(dbgs() << "Converting operand: " << MO << '\n'); -    assert(MO.isReg() && "Unsupported non-reg operand"); - -    unsigned Reg = MO.getReg(); -    // Physical registers don't need to be constrained. -    if (TRI.isPhysicalRegister(Reg)) -      continue; - -    // Register operands with a value of 0 (e.g. predicate operands) don't need -    // to be constrained. -    if (Reg == 0) -      continue; - -    // If the operand is a vreg, we should constrain its regclass, and only -    // insert COPYs if that's impossible. -    // constrainOperandRegClass does that for us. -    MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), -                                       Reg, OpI)); - -    // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been -    // done. -    if (MO.isUse()) { -      int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); -      if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) -        I.tieOperands(DefIdx, OpI); -    } -  } -  return true; -} -  bool InstructionSelector::isOperandImmEqual(      const MachineOperand &MO, int64_t Value,      const MachineRegisterInfo &MRI) const { diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp new file mode 100644 index 000000000000..344f573a67f5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -0,0 +1,101 @@ +//===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A library of predicate factories to use for LegalityPredicate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +using namespace llvm; + +LegalityPredicate LegalityPredicates::typeIs(unsigned TypeIdx, LLT Type) { +  return +      [=](const LegalityQuery &Query) { return Query.Types[TypeIdx] == Type; }; +} + +LegalityPredicate +LegalityPredicates::typeInSet(unsigned TypeIdx, +                              std::initializer_list<LLT> TypesInit) { +  SmallVector<LLT, 4> Types = TypesInit; +  return [=](const LegalityQuery &Query) { +    return std::find(Types.begin(), Types.end(), Query.Types[TypeIdx]) != Types.end(); +  }; +} + +LegalityPredicate LegalityPredicates::typePairInSet( +    unsigned TypeIdx0, unsigned TypeIdx1, +    std::initializer_list<std::pair<LLT, LLT>> TypesInit) { +  SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit; +  return [=](const LegalityQuery &Query) { +    std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]}; +    return std::find(Types.begin(), Types.end(), Match) != Types.end(); +  }; +} + +LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet( +    unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx, +    std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) { +  SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit; +  return [=](const LegalityQuery &Query) { +    TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], +                                Query.MMODescrs[MMOIdx].Size}; +    return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) != +           TypesAndMemSize.end(); +  }; +} + +LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) { +  return [=](const LegalityQuery &Query) { +    return Query.Types[TypeIdx].isScalar(); +  }; +} + +LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx, +                                                   unsigned Size) { +  return [=](const LegalityQuery &Query) { +    const LLT &QueryTy = Query.Types[TypeIdx]; +    return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size; +  }; +} + +LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx, +                                                unsigned Size) { +  return [=](const LegalityQuery &Query) { +    const LLT &QueryTy = Query.Types[TypeIdx]; +    return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size; +  }; +} + +LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) { +  return [=](const LegalityQuery &Query) { +    const LLT &QueryTy = Query.Types[TypeIdx]; +    return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits()); +  }; +} + +LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { +  return [=](const LegalityQuery &Query) { +    return !isPowerOf2_32(Query.MMODescrs[MMOIdx].Size /* In Bytes */); +  }; +} + +LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) { +  return [=](const LegalityQuery &Query) { +    const LLT &QueryTy = Query.Types[TypeIdx]; +    return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements()); +  }; +} + +LegalityPredicate LegalityPredicates::atomicOrderingAtLeastOrStrongerThan( +    unsigned MMOIdx, AtomicOrdering Ordering) { +  return [=](const LegalityQuery &Query) { +    return isAtLeastOrStrongerThan(Query.MMODescrs[MMOIdx].Ordering, Ordering); +  }; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp new file mode 100644 index 000000000000..a29b32ecdc03 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -0,0 +1,51 @@ +//===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A library of mutation factories to use for LegalityMutation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +using namespace llvm; + +LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, LLT Ty) { +  return +      [=](const LegalityQuery &Query) { return std::make_pair(TypeIdx, Ty); }; +} + +LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx, +                                             unsigned FromTypeIdx) { +  return [=](const LegalityQuery &Query) { +    return std::make_pair(TypeIdx, Query.Types[FromTypeIdx]); +  }; +} + +LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx, +                                                          unsigned Min) { +  return [=](const LegalityQuery &Query) { +    unsigned NewSizeInBits = +        1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits()); +    if (NewSizeInBits < Min) +      NewSizeInBits = Min; +    return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits)); +  }; +} + +LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx, +                                                           unsigned Min) { +  return [=](const LegalityQuery &Query) { +    const LLT &VecTy = Query.Types[TypeIdx]; +    unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements()); +    if (NewNumElements < Min) +      NewNumElements = Min; +    return std::make_pair( +        TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits())); +  }; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index f09b0d9f11e7..9a2aac998a84 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -47,6 +47,7 @@ Legalizer::Legalizer() : MachineFunctionPass(ID) {  void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<TargetPassConfig>(); +  getSelectionDAGFallbackAnalysisUsage(AU);    MachineFunctionPass::getAnalysisUsage(AU);  } @@ -72,7 +73,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {    if (MF.getProperties().hasProperty(            MachineFunctionProperties::Property::FailedISel))      return false; -  DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');    init(MF);    const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();    MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -112,7 +113,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {        else          InstList.insert(MI);      } -    DEBUG(dbgs() << ".. .. New MI: " << *MI;); +    LLVM_DEBUG(dbgs() << ".. .. New MI: " << *MI;);    });    const LegalizerInfo &LInfo(Helper.getLegalizerInfo());    LegalizationArtifactCombiner ArtCombiner(Helper.MIRBuilder, MF.getRegInfo(), LInfo); @@ -127,7 +128,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {        MachineInstr &MI = *InstList.pop_back_val();        assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");        if (isTriviallyDead(MI, MRI)) { -        DEBUG(dbgs() << MI << "Is dead; erasing.\n"); +        LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");          MI.eraseFromParentAndMarkDBGValuesForRemoval();          continue;        } @@ -148,7 +149,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {        MachineInstr &MI = *ArtifactList.pop_back_val();        assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");        if (isTriviallyDead(MI, MRI)) { -        DEBUG(dbgs() << MI << "Is dead; erasing.\n"); +        LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");          RemoveDeadInstFromLists(&MI);          MI.eraseFromParentAndMarkDBGValuesForRemoval();          continue; @@ -156,7 +157,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {        SmallVector<MachineInstr *, 4> DeadInstructions;        if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {          for (auto *DeadMI : DeadInstructions) { -          DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI); +          LLVM_DEBUG(dbgs() << ".. Erasing Dead Instruction " << *DeadMI);            RemoveDeadInstFromLists(DeadMI);            DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();          } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 6bebe180fefd..87086af121b7 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -26,6 +26,7 @@  #define DEBUG_TYPE "legalizer"  using namespace llvm; +using namespace LegalizeActions;  LegalizerHelper::LegalizerHelper(MachineFunction &MF)      : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) { @@ -34,34 +35,34 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF)  LegalizerHelper::LegalizeResult  LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { -  DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); +  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); -  auto Action = LI.getAction(MI, MRI); -  switch (std::get<0>(Action)) { -  case LegalizerInfo::Legal: -    DEBUG(dbgs() << ".. Already legal\n"); +  auto Step = LI.getAction(MI, MRI); +  switch (Step.Action) { +  case Legal: +    LLVM_DEBUG(dbgs() << ".. Already legal\n");      return AlreadyLegal; -  case LegalizerInfo::Libcall: -    DEBUG(dbgs() << ".. Convert to libcall\n"); +  case Libcall: +    LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");      return libcall(MI); -  case LegalizerInfo::NarrowScalar: -    DEBUG(dbgs() << ".. Narrow scalar\n"); -    return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action)); -  case LegalizerInfo::WidenScalar: -    DEBUG(dbgs() << ".. Widen scalar\n"); -    return widenScalar(MI, std::get<1>(Action), std::get<2>(Action)); -  case LegalizerInfo::Lower: -    DEBUG(dbgs() << ".. Lower\n"); -    return lower(MI, std::get<1>(Action), std::get<2>(Action)); -  case LegalizerInfo::FewerElements: -    DEBUG(dbgs() << ".. Reduce number of elements\n"); -    return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action)); -  case LegalizerInfo::Custom: -    DEBUG(dbgs() << ".. Custom legalization\n"); +  case NarrowScalar: +    LLVM_DEBUG(dbgs() << ".. Narrow scalar\n"); +    return narrowScalar(MI, Step.TypeIdx, Step.NewType); +  case WidenScalar: +    LLVM_DEBUG(dbgs() << ".. Widen scalar\n"); +    return widenScalar(MI, Step.TypeIdx, Step.NewType); +  case Lower: +    LLVM_DEBUG(dbgs() << ".. Lower\n"); +    return lower(MI, Step.TypeIdx, Step.NewType); +  case FewerElements: +    LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); +    return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); +  case Custom: +    LLVM_DEBUG(dbgs() << ".. Custom legalization\n");      return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized                                                    : UnableToLegalize;    default: -    DEBUG(dbgs() << ".. Unable to legalize\n"); +    LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");      return UnableToLegalize;    }  } @@ -103,6 +104,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {      return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;    case TargetOpcode::G_FPOW:      return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; +  case TargetOpcode::G_FMA: +    assert((Size == 32 || Size == 64) && "Unsupported size"); +    return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;    }    llvm_unreachable("Unknown libcall function");  } @@ -123,13 +127,47 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,    return LegalizerHelper::Legalized;  } +// Useful for libcalls where all operands have the same type.  static LegalizerHelper::LegalizeResult  simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,                Type *OpType) {    auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + +  SmallVector<CallLowering::ArgInfo, 3> Args; +  for (unsigned i = 1; i < MI.getNumOperands(); i++) +    Args.push_back({MI.getOperand(i).getReg(), OpType});    return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, -                       {{MI.getOperand(1).getReg(), OpType}, -                        {MI.getOperand(2).getReg(), OpType}}); +                       Args); +} + +static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, +                                       Type *FromType) { +  auto ToMVT = MVT::getVT(ToType); +  auto FromMVT = MVT::getVT(FromType); + +  switch (Opcode) { +  case TargetOpcode::G_FPEXT: +    return RTLIB::getFPEXT(FromMVT, ToMVT); +  case TargetOpcode::G_FPTRUNC: +    return RTLIB::getFPROUND(FromMVT, ToMVT); +  case TargetOpcode::G_FPTOSI: +    return RTLIB::getFPTOSINT(FromMVT, ToMVT); +  case TargetOpcode::G_FPTOUI: +    return RTLIB::getFPTOUINT(FromMVT, ToMVT); +  case TargetOpcode::G_SITOFP: +    return RTLIB::getSINTTOFP(FromMVT, ToMVT); +  case TargetOpcode::G_UITOFP: +    return RTLIB::getUINTTOFP(FromMVT, ToMVT); +  } +  llvm_unreachable("Unsupported libcall function"); +} + +static LegalizerHelper::LegalizeResult +conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, +                  Type *FromType) { +  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); +  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, +                       {{MI.getOperand(1).getReg(), FromType}});  }  LegalizerHelper::LegalizeResult @@ -157,6 +195,7 @@ LegalizerHelper::libcall(MachineInstr &MI) {    case TargetOpcode::G_FSUB:    case TargetOpcode::G_FMUL:    case TargetOpcode::G_FDIV: +  case TargetOpcode::G_FMA:    case TargetOpcode::G_FPOW:    case TargetOpcode::G_FREM: {      Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); @@ -165,6 +204,59 @@ LegalizerHelper::libcall(MachineInstr &MI) {        return Status;      break;    } +  case TargetOpcode::G_FPEXT: { +    // FIXME: Support other floating point types (half, fp128 etc) +    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); +    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); +    if (ToSize != 64 || FromSize != 32) +      return UnableToLegalize; +    LegalizeResult Status = conversionLibcall( +        MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); +    if (Status != Legalized) +      return Status; +    break; +  } +  case TargetOpcode::G_FPTRUNC: { +    // FIXME: Support other floating point types (half, fp128 etc) +    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); +    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); +    if (ToSize != 32 || FromSize != 64) +      return UnableToLegalize; +    LegalizeResult Status = conversionLibcall( +        MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); +    if (Status != Legalized) +      return Status; +    break; +  } +  case TargetOpcode::G_FPTOSI: +  case TargetOpcode::G_FPTOUI: { +    // FIXME: Support other types +    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); +    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); +    if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) +      return UnableToLegalize; +    LegalizeResult Status = conversionLibcall( +        MI, MIRBuilder, Type::getInt32Ty(Ctx), +        FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); +    if (Status != Legalized) +      return Status; +    break; +  } +  case TargetOpcode::G_SITOFP: +  case TargetOpcode::G_UITOFP: { +    // FIXME: Support other types +    unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); +    unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); +    if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) +      return UnableToLegalize; +    LegalizeResult Status = conversionLibcall( +        MI, MIRBuilder, +        ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), +        Type::getInt32Ty(Ctx)); +    if (Status != Legalized) +      return Status; +    break; +  }    }    MI.eraseFromParent(); @@ -180,8 +272,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,    MIRBuilder.setInstr(MI); -  int64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); -  int64_t NarrowSize = NarrowTy.getSizeInBits(); +  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); +  uint64_t NarrowSize = NarrowTy.getSizeInBits();    switch (MI.getOpcode()) {    default: @@ -194,11 +286,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,      int NumParts = SizeOp0 / NarrowSize;      SmallVector<unsigned, 2> DstRegs; -    for (int i = 0; i < NumParts; ++i) { -      unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); -      MIRBuilder.buildUndef(Dst); -      DstRegs.push_back(Dst); -    } +    for (int i = 0; i < NumParts; ++i) +      DstRegs.push_back( +          MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());      MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);      MI.eraseFromParent();      return Legalized; @@ -249,8 +339,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,      extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);      unsigned OpReg = MI.getOperand(0).getReg(); -    int64_t OpStart = MI.getOperand(2).getImm(); -    int64_t OpSize = MRI.getType(OpReg).getSizeInBits(); +    uint64_t OpStart = MI.getOperand(2).getImm(); +    uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();      for (int i = 0; i < NumParts; ++i) {        unsigned SrcStart = i * NarrowSize; @@ -265,7 +355,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,        // OpSegStart is where this destination segment would start in OpReg if it        // extended infinitely in both directions. -      int64_t ExtractOffset, SegSize; +      int64_t ExtractOffset; +      uint64_t SegSize;        if (OpStart < SrcStart) {          ExtractOffset = 0;          SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); @@ -301,8 +392,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,      extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);      unsigned OpReg = MI.getOperand(2).getReg(); -    int64_t OpStart = MI.getOperand(3).getImm(); -    int64_t OpSize = MRI.getType(OpReg).getSizeInBits(); +    uint64_t OpStart = MI.getOperand(3).getImm(); +    uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();      for (int i = 0; i < NumParts; ++i) {        unsigned DstStart = i * NarrowSize; @@ -319,7 +410,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,        // OpSegStart is where this destination segment would start in OpReg if it        // extended infinitely in both directions. -      int64_t ExtractOffset, InsertOffset, SegSize; +      int64_t ExtractOffset, InsertOffset; +      uint64_t SegSize;        if (OpStart < DstStart) {          InsertOffset = 0;          ExtractOffset = DstStart - OpStart; @@ -353,6 +445,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,      // NarrowSize.      if (SizeOp0 % NarrowSize != 0)        return UnableToLegalize; + +    const auto &MMO = **MI.memoperands_begin(); +    // This implementation doesn't work for atomics. Give up instead of doing +    // something invalid. +    if (MMO.getOrdering() != AtomicOrdering::NotAtomic || +        MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) +      return UnableToLegalize; +      int NumParts = SizeOp0 / NarrowSize;      LLT OffsetTy = LLT::scalar(          MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -363,12 +463,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,        unsigned SrcReg = 0;        unsigned Adjustment = i * NarrowSize / 8; +      MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( +          MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), +          NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, +          MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), +          MMO.getOrdering(), MMO.getFailureOrdering()); +        MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,                                  Adjustment); -      // TODO: This is conservatively correct, but we probably want to split the -      // memory operands in the future. -      MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin()); +      MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);        DstRegs.push_back(DstReg);      } @@ -382,6 +486,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,      // NarrowSize.      if (SizeOp0 % NarrowSize != 0)        return UnableToLegalize; + +    const auto &MMO = **MI.memoperands_begin(); +    // This implementation doesn't work for atomics. Give up instead of doing +    // something invalid. +    if (MMO.getOrdering() != AtomicOrdering::NotAtomic || +        MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) +      return UnableToLegalize; +      int NumParts = SizeOp0 / NarrowSize;      LLT OffsetTy = LLT::scalar(          MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); @@ -393,12 +505,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,        unsigned DstReg = 0;        unsigned Adjustment = i * NarrowSize / 8; +      MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( +          MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), +          NarrowSize / 8, i == 0 ? MMO.getAlignment() : NarrowSize / 8, +          MMO.getAAInfo(), MMO.getRanges(), MMO.getSyncScopeID(), +          MMO.getOrdering(), MMO.getFailureOrdering()); +        MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,                                  Adjustment); -      // TODO: This is conservatively correct, but we probably want to split the -      // memory operands in the future. -      MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin()); +      MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);      }      MI.eraseFromParent();      return Legalized; @@ -475,6 +591,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,    }  } +void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, +                                     unsigned OpIdx, unsigned ExtOpcode) { +  MachineOperand &MO = MI.getOperand(OpIdx); +  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, WideTy, MO.getReg()); +  MO.setReg(ExtB->getOperand(0).getReg()); +} + +void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, +                                     unsigned OpIdx, unsigned TruncOpcode) { +  MachineOperand &MO = MI.getOperand(OpIdx); +  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); +  MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); +  MIRBuilder.buildInstr(TruncOpcode, MO.getReg(), DstExt); +  MO.setReg(DstExt); +} +  LegalizerHelper::LegalizeResult  LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {    MIRBuilder.setInstr(MI); @@ -482,303 +614,201 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {    switch (MI.getOpcode()) {    default:      return UnableToLegalize; +    case TargetOpcode::G_ADD:    case TargetOpcode::G_AND:    case TargetOpcode::G_MUL:    case TargetOpcode::G_OR:    case TargetOpcode::G_XOR:    case TargetOpcode::G_SUB: -  case TargetOpcode::G_SHL: {      // Perform operation at larger width (any extension is fine here, high bits      // don't affect the result) and then truncate the result back to the      // original type. -    unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy); -    unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(1).getReg()); -    MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(2).getReg()); - -    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildInstr(MI.getOpcode()) -        .addDef(DstExt) -        .addUse(Src1Ext) -        .addUse(Src2Ext); - -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); -    MI.eraseFromParent(); +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); +    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } + +  case TargetOpcode::G_SHL: +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); +    // The "number of bits to shift" operand must preserve its value as an +    // unsigned integer: +    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI); +    return Legalized; +    case TargetOpcode::G_SDIV: -  case TargetOpcode::G_UDIV:    case TargetOpcode::G_SREM: -  case TargetOpcode::G_UREM: +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); +    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI); +    return Legalized; +    case TargetOpcode::G_ASHR: -  case TargetOpcode::G_LSHR: { -    unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV || -                             MI.getOpcode() == TargetOpcode::G_SREM || -                             MI.getOpcode() == TargetOpcode::G_ASHR -                         ? TargetOpcode::G_SEXT -                         : TargetOpcode::G_ZEXT; - -    unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse( -        MI.getOperand(1).getReg()); - -    unsigned RHSExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildInstr(ExtOp).addDef(RHSExt).addUse( -        MI.getOperand(2).getReg()); - -    unsigned ResExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildInstr(MI.getOpcode()) -        .addDef(ResExt) -        .addUse(LHSExt) -        .addUse(RHSExt); - -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), ResExt); -    MI.eraseFromParent(); +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); +    // The "number of bits to shift" operand must preserve its value as an +    // unsigned integer: +    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } -  case TargetOpcode::G_SELECT: { + +  case TargetOpcode::G_UDIV: +  case TargetOpcode::G_UREM: +  case TargetOpcode::G_LSHR: +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); +    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI); +    return Legalized; + +  case TargetOpcode::G_SELECT:      if (TypeIdx != 0)        return UnableToLegalize; -      // Perform operation at larger width (any extension is fine here, high bits      // don't affect the result) and then truncate the result back to the      // original type. -    unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy); -    unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(2).getReg()); -    MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(3).getReg()); - -    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildInstr(TargetOpcode::G_SELECT) -        .addDef(DstExt) -        .addReg(MI.getOperand(1).getReg()) -        .addUse(Src1Ext) -        .addUse(Src2Ext); - -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); -    MI.eraseFromParent(); +    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT); +    widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } +    case TargetOpcode::G_FPTOSI: -  case TargetOpcode::G_FPTOUI: { +  case TargetOpcode::G_FPTOUI:      if (TypeIdx != 0)        return UnableToLegalize; - -    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildInstr(MI.getOpcode()) -        .addDef(DstExt) -        .addUse(MI.getOperand(1).getReg()); - -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); -    MI.eraseFromParent(); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } +    case TargetOpcode::G_SITOFP: -  case TargetOpcode::G_UITOFP: {      if (TypeIdx != 1)        return UnableToLegalize; +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); +    MIRBuilder.recordInsertion(&MI); +    return Legalized; -    unsigned Src = MI.getOperand(1).getReg(); -    unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy); - -    if (MI.getOpcode() == TargetOpcode::G_SITOFP) { -      MIRBuilder.buildSExt(SrcExt, Src); -    } else { -      assert(MI.getOpcode() == TargetOpcode::G_UITOFP && "Unexpected conv op"); -      MIRBuilder.buildZExt(SrcExt, Src); -    } - -    MIRBuilder.buildInstr(MI.getOpcode()) -        .addDef(MI.getOperand(0).getReg()) -        .addUse(SrcExt); - -    MI.eraseFromParent(); +  case TargetOpcode::G_UITOFP: +    if (TypeIdx != 1) +      return UnableToLegalize; +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } -  case TargetOpcode::G_INSERT: { + +  case TargetOpcode::G_INSERT:      if (TypeIdx != 0)        return UnableToLegalize; - -    unsigned Src = MI.getOperand(1).getReg(); -    unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildAnyExt(SrcExt, Src); - -    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); -    auto MIB = MIRBuilder.buildInsert(DstExt, SrcExt, MI.getOperand(2).getReg(), -                                      MI.getOperand(3).getImm()); -    for (unsigned OpNum = 4; OpNum < MI.getNumOperands(); OpNum += 2) { -      MIB.addReg(MI.getOperand(OpNum).getReg()); -      MIB.addImm(MI.getOperand(OpNum + 1).getImm()); -    } - -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); -    MI.eraseFromParent(); +    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } -  case TargetOpcode::G_LOAD: { -    assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) == -               WideTy.getSizeInBits() && -           "illegal to increase number of bytes loaded"); - -    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildLoad(DstExt, MI.getOperand(1).getReg(), -                         **MI.memoperands_begin()); -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); -    MI.eraseFromParent(); + +  case TargetOpcode::G_LOAD: +    // For some types like i24, we might try to widen to i32. To properly handle +    // this we should be using a dedicated extending load, until then avoid +    // trying to legalize. +    if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != +        WideTy.getSizeInBits()) +      return UnableToLegalize; +    LLVM_FALLTHROUGH; +  case TargetOpcode::G_SEXTLOAD: +  case TargetOpcode::G_ZEXTLOAD: +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } +    case TargetOpcode::G_STORE: {      if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||          WideTy != LLT::scalar(8))        return UnableToLegalize; -    auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); -    auto Content = TLI.getBooleanContents(false, false); - -    unsigned ExtOp = TargetOpcode::G_ANYEXT; -    if (Content == TargetLoweringBase::ZeroOrOneBooleanContent) -      ExtOp = TargetOpcode::G_ZEXT; -    else if (Content == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) -      ExtOp = TargetOpcode::G_SEXT; -    else -      ExtOp = TargetOpcode::G_ANYEXT; - -    unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildInstr(ExtOp).addDef(SrcExt).addUse( -        MI.getOperand(0).getReg()); -    MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(), -                          **MI.memoperands_begin()); -    MI.eraseFromParent(); +    widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); +    MIRBuilder.recordInsertion(&MI);      return Legalized;    }    case TargetOpcode::G_CONSTANT: { -    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildConstant(DstExt, *MI.getOperand(1).getCImm()); -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt); -    MI.eraseFromParent(); +    MachineOperand &SrcMO = MI.getOperand(1); +    LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); +    const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits()); +    SrcMO.setCImm(ConstantInt::get(Ctx, Val)); + +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized;    }    case TargetOpcode::G_FCONSTANT: { -    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); -    const ConstantFP *CFP = MI.getOperand(1).getFPImm(); -    APFloat Val = CFP->getValueAPF(); +    MachineOperand &SrcMO = MI.getOperand(1);      LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); -    auto LLT2Sem = [](LLT Ty) { -      switch (Ty.getSizeInBits()) { -      case 32: -        return &APFloat::IEEEsingle(); -        break; -      case 64: -        return &APFloat::IEEEdouble(); -        break; -      default: -        llvm_unreachable("Unhandled fp widen type"); -      } -    }; +    APFloat Val = SrcMO.getFPImm()->getValueAPF();      bool LosesInfo; -    Val.convert(*LLT2Sem(WideTy), APFloat::rmTowardZero, &LosesInfo); -    MIRBuilder.buildFConstant(DstExt, *ConstantFP::get(Ctx, Val)); -    MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt); -    MI.eraseFromParent(); +    switch (WideTy.getSizeInBits()) { +    case 32: +      Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); +      break; +    case 64: +      Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); +      break; +    default: +      llvm_unreachable("Unhandled fp widen type"); +    } +    SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); + +    widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); +    MIRBuilder.recordInsertion(&MI);      return Legalized;    } -  case TargetOpcode::G_BRCOND: { -    unsigned TstExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildAnyExt(TstExt, MI.getOperand(0).getReg()); -    MIRBuilder.buildBrCond(TstExt, *MI.getOperand(1).getMBB()); -    MI.eraseFromParent(); +  case TargetOpcode::G_BRCOND: +    widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } -  case TargetOpcode::G_FCMP: { -    unsigned Op0Ext, Op1Ext, DstReg; -    unsigned Cmp1 = MI.getOperand(2).getReg(); -    unsigned Cmp2 = MI.getOperand(3).getReg(); -    if (TypeIdx == 0) { -      Op0Ext = Cmp1; -      Op1Ext = Cmp2; -      DstReg = MRI.createGenericVirtualRegister(WideTy); -    } else { -      Op0Ext = MRI.createGenericVirtualRegister(WideTy); -      Op1Ext = MRI.createGenericVirtualRegister(WideTy); -      DstReg = MI.getOperand(0).getReg(); -      MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op0Ext, Cmp1); -      MIRBuilder.buildInstr(TargetOpcode::G_FPEXT, Op1Ext, Cmp2); -    } -    MIRBuilder.buildFCmp( -        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()), -        DstReg, Op0Ext, Op1Ext); + +  case TargetOpcode::G_FCMP:      if (TypeIdx == 0) -      MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(), -                            DstReg); -    MI.eraseFromParent(); -    return Legalized; -  } -  case TargetOpcode::G_ICMP: { -    bool IsSigned = CmpInst::isSigned( -        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate())); -    unsigned Cmp1 = MI.getOperand(2).getReg(); -    unsigned Cmp2 = MI.getOperand(3).getReg(); -    unsigned Op0Ext, Op1Ext, DstReg; -    if (TypeIdx == 0) { -      Op0Ext = Cmp1; -      Op1Ext = Cmp2; -      DstReg = MRI.createGenericVirtualRegister(WideTy); -    } else { -      Op0Ext = MRI.createGenericVirtualRegister(WideTy); -      Op1Ext = MRI.createGenericVirtualRegister(WideTy); -      DstReg = MI.getOperand(0).getReg(); -      if (IsSigned) { -        MIRBuilder.buildSExt(Op0Ext, Cmp1); -        MIRBuilder.buildSExt(Op1Ext, Cmp2); -      } else { -        MIRBuilder.buildZExt(Op0Ext, Cmp1); -        MIRBuilder.buildZExt(Op1Ext, Cmp2); -      } +      widenScalarDst(MI, WideTy); +    else { +      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); +      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);      } -    MIRBuilder.buildICmp( -        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()), -        DstReg, Op0Ext, Op1Ext); +    MIRBuilder.recordInsertion(&MI); +    return Legalized; + +  case TargetOpcode::G_ICMP:      if (TypeIdx == 0) -      MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, MI.getOperand(0).getReg(), -                            DstReg); -    MI.eraseFromParent(); +      widenScalarDst(MI, WideTy); +    else { +      unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( +                               MI.getOperand(1).getPredicate())) +                               ? TargetOpcode::G_SEXT +                               : TargetOpcode::G_ZEXT; +      widenScalarSrc(MI, WideTy, 2, ExtOpcode); +      widenScalarSrc(MI, WideTy, 3, ExtOpcode); +    } +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } -  case TargetOpcode::G_GEP: { + +  case TargetOpcode::G_GEP:      assert(TypeIdx == 1 && "unable to legalize pointer of GEP"); -    unsigned OffsetExt = MRI.createGenericVirtualRegister(WideTy); -    MIRBuilder.buildSExt(OffsetExt, MI.getOperand(2).getReg()); -    MI.getOperand(2).setReg(OffsetExt); +    widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); +    MIRBuilder.recordInsertion(&MI);      return Legalized; -  } +    case TargetOpcode::G_PHI: {      assert(TypeIdx == 0 && "Expecting only Idx 0"); -    auto getExtendedReg = [&](unsigned Reg, MachineBasicBlock &MBB) { -      auto FirstTermIt = MBB.getFirstTerminator(); -      MIRBuilder.setInsertPt(MBB, FirstTermIt); -      MachineInstr *DefMI = MRI.getVRegDef(Reg); -      MachineInstrBuilder MIB; -      if (DefMI->getOpcode() == TargetOpcode::G_TRUNC) -        MIB = MIRBuilder.buildAnyExtOrTrunc(WideTy, -                                            DefMI->getOperand(1).getReg()); -      else -        MIB = MIRBuilder.buildAnyExt(WideTy, Reg); -      return MIB->getOperand(0).getReg(); -    }; -    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, WideTy); -    for (auto OpIt = MI.operands_begin() + 1, OpE = MI.operands_end(); -         OpIt != OpE;) { -      unsigned Reg = OpIt++->getReg(); -      MachineBasicBlock *OpMBB = OpIt++->getMBB(); -      MIB.addReg(getExtendedReg(Reg, *OpMBB)); -      MIB.addMBB(OpMBB); + +    for (unsigned I = 1; I < MI.getNumOperands(); I += 2) { +      MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); +      MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); +      widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);      } -    auto *MBB = MI.getParent(); -    MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); -    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), -                          MIB->getOperand(0).getReg()); -    MI.eraseFromParent(); + +    MachineBasicBlock &MBB = *MI.getParent(); +    MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); +    widenScalarDst(MI, WideTy); +    MIRBuilder.recordInsertion(&MI);      return Legalized;    }    } @@ -874,11 +904,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {      }      ConstantFP &ZeroForNegation =          *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); -    unsigned Zero = MRI.createGenericVirtualRegister(Ty); -    MIRBuilder.buildFConstant(Zero, ZeroForNegation); +    auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);      MIRBuilder.buildInstr(TargetOpcode::G_FSUB)          .addDef(Res) -        .addUse(Zero) +        .addUse(Zero->getOperand(0).getReg())          .addUse(MI.getOperand(1).getReg());      MI.eraseFromParent();      return Legalized; @@ -887,7 +916,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {      // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).      // First, check if G_FNEG is marked as Lower. If so, we may      // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. -    if (LI.getAction({G_FNEG, Ty}).first == LegalizerInfo::Lower) +    if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)        return UnableToLegalize;      unsigned Res = MI.getOperand(0).getReg();      unsigned LHS = MI.getOperand(1).getReg(); @@ -913,6 +942,48 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {      MI.eraseFromParent();      return Legalized;    } +  case TargetOpcode::G_LOAD: +  case TargetOpcode::G_SEXTLOAD: +  case TargetOpcode::G_ZEXTLOAD: { +    // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT +    unsigned DstReg = MI.getOperand(0).getReg(); +    unsigned PtrReg = MI.getOperand(1).getReg(); +    LLT DstTy = MRI.getType(DstReg); +    auto &MMO = **MI.memoperands_begin(); + +    if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { +      // In the case of G_LOAD, this was a non-extending load already and we're +      // about to lower to the same instruction. +      if (MI.getOpcode() == TargetOpcode::G_LOAD) +          return UnableToLegalize; +      MIRBuilder.buildLoad(DstReg, PtrReg, MMO); +      MI.eraseFromParent(); +      return Legalized; +    } + +    if (DstTy.isScalar()) { +      unsigned TmpReg = MRI.createGenericVirtualRegister( +          LLT::scalar(MMO.getSize() /* in bytes */ * 8)); +      MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); +      switch (MI.getOpcode()) { +      default: +        llvm_unreachable("Unexpected opcode"); +      case TargetOpcode::G_LOAD: +        MIRBuilder.buildAnyExt(DstReg, TmpReg); +        break; +      case TargetOpcode::G_SEXTLOAD: +        MIRBuilder.buildSExt(DstReg, TmpReg); +        break; +      case TargetOpcode::G_ZEXTLOAD: +        MIRBuilder.buildZExt(DstReg, TmpReg); +        break; +      } +      MI.eraseFromParent(); +      return Legalized; +    } + +    return UnableToLegalize; +  }    }  } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 9c27c59a0654..ae061b64a38c 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -24,12 +24,87 @@  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/LowLevelTypeImpl.h"  #include "llvm/Support/MathExtras.h"  #include <algorithm>  #include <map> +  using namespace llvm; +using namespace LegalizeActions; + +#define DEBUG_TYPE "legalizer-info" + +cl::opt<bool> llvm::DisableGISelLegalityCheck( +    "disable-gisel-legality-check", +    cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"), +    cl::Hidden); + +raw_ostream &LegalityQuery::print(raw_ostream &OS) const { +  OS << Opcode << ", Tys={"; +  for (const auto &Type : Types) { +    OS << Type << ", "; +  } +  OS << "}, Opcode="; + +  OS << Opcode << ", MMOs={"; +  for (const auto &MMODescr : MMODescrs) { +    OS << MMODescr.Size << ", "; +  } +  OS << "}"; + +  return OS; +} + +LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const { +  LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs()); +             dbgs() << "\n"); +  if (Rules.empty()) { +    LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n"); +    return {LegalizeAction::UseLegacyRules, 0, LLT{}}; +  } +  for (const auto &Rule : Rules) { +    if (Rule.match(Query)) { +      LLVM_DEBUG(dbgs() << ".. match\n"); +      std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query); +      LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", " +                        << Mutation.first << ", " << Mutation.second << "\n"); +      assert((Query.Types[Mutation.first] != Mutation.second || +              Rule.getAction() == Lower || +              Rule.getAction() == MoreElements || +              Rule.getAction() == FewerElements) && +             "Simple loop detected"); +      return {Rule.getAction(), Mutation.first, Mutation.second}; +    } else +      LLVM_DEBUG(dbgs() << ".. no match\n"); +  } +  LLVM_DEBUG(dbgs() << ".. unsupported\n"); +  return {LegalizeAction::Unsupported, 0, LLT{}}; +} + +bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const { +#ifndef NDEBUG +  if (Rules.empty()) { +    LLVM_DEBUG( +        dbgs() << ".. type index coverage check SKIPPED: no rules defined\n"); +    return true; +  } +  const int64_t FirstUncovered = TypeIdxsCovered.find_first_unset(); +  if (FirstUncovered < 0) { +    LLVM_DEBUG(dbgs() << ".. type index coverage check SKIPPED:" +                         " user-defined predicate detected\n"); +    return true; +  } +  const bool AllCovered = (FirstUncovered >= NumTypeIdxs); +  LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered +                    << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); +  return AllCovered; +#else +  return true; +#endif +}  LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {    // Set defaults. @@ -104,15 +179,16 @@ void LegalizerInfo::computeTables() {          if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&              ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)            S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx]; -        std::sort(ScalarSpecifiedActions.begin(), ScalarSpecifiedActions.end()); +        llvm::sort(ScalarSpecifiedActions.begin(), +                   ScalarSpecifiedActions.end());          checkPartialSizeAndActionsVector(ScalarSpecifiedActions);          setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));        }        // 2. Handle pointer types        for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) { -        std::sort(PointerSpecifiedActions.second.begin(), -                  PointerSpecifiedActions.second.end()); +        llvm::sort(PointerSpecifiedActions.second.begin(), +                   PointerSpecifiedActions.second.end());          checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);          // For pointer types, we assume that there isn't a meaningfull way          // to change the number of bits used in the pointer. @@ -124,8 +200,8 @@ void LegalizerInfo::computeTables() {        // 3. Handle vector types        SizeAndActionsVec ElementSizesSeen;        for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) { -        std::sort(VectorSpecifiedActions.second.begin(), -                  VectorSpecifiedActions.second.end()); +        llvm::sort(VectorSpecifiedActions.second.begin(), +                   VectorSpecifiedActions.second.end());          const uint16_t ElementSize = VectorSpecifiedActions.first;          ElementSizesSeen.push_back({ElementSize, Legal});          checkPartialSizeAndActionsVector(VectorSpecifiedActions.second); @@ -143,7 +219,7 @@ void LegalizerInfo::computeTables() {              Opcode, TypeIdx, ElementSize,              moreToWiderTypesAndLessToWidest(NumElementsActions));        } -      std::sort(ElementSizesSeen.begin(), ElementSizesSeen.end()); +      llvm::sort(ElementSizesSeen.begin(), ElementSizesSeen.end());        SizeChangeStrategy VectorElementSizeChangeStrategy =            &unsupportedForDifferentSizes;        if (TypeIdx < VectorElementSizeChangeStrategies[OpcodeIdx].size() && @@ -162,8 +238,8 @@ void LegalizerInfo::computeTables() {  // probably going to need specialized lookup structures for various types before  // we have any hope of doing well with something like <13 x i3>. Even the common  // cases should do better than what we have now. -std::pair<LegalizerInfo::LegalizeAction, LLT> -LegalizerInfo::getAction(const InstrAspect &Aspect) const { +std::pair<LegalizeAction, LLT> +LegalizerInfo::getAspectAction(const InstrAspect &Aspect) const {    assert(TablesInitialized && "backend forgot to call computeTables");    // These *have* to be implemented for now, they're the fundamental basis of    // how everything else is transformed. @@ -186,9 +262,87 @@ static LLT getTypeFromTypeIdx(const MachineInstr &MI,    return MRI.getType(MI.getOperand(OpIdx).getReg());  } -std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT> +unsigned LegalizerInfo::getOpcodeIdxForOpcode(unsigned Opcode) const { +  assert(Opcode >= FirstOp && Opcode <= LastOp && "Unsupported opcode"); +  return Opcode - FirstOp; +} + +unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const { +  unsigned OpcodeIdx = getOpcodeIdxForOpcode(Opcode); +  if (unsigned Alias = RulesForOpcode[OpcodeIdx].getAlias()) { +    LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias +                      << "\n"); +    OpcodeIdx = getOpcodeIdxForOpcode(Alias); +    LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to " +                      << RulesForOpcode[OpcodeIdx].getAlias() << "\n"); +    assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases"); +  } + +  return OpcodeIdx; +} + +const LegalizeRuleSet & +LegalizerInfo::getActionDefinitions(unsigned Opcode) const { +  unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode); +  return RulesForOpcode[OpcodeIdx]; +} + +LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(unsigned Opcode) { +  unsigned OpcodeIdx = getActionDefinitionsIdx(Opcode); +  auto &Result = RulesForOpcode[OpcodeIdx]; +  assert(!Result.isAliasedByAnother() && "Modifying this opcode will modify aliases"); +  return Result; +} + +LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( +    std::initializer_list<unsigned> Opcodes) { +  unsigned Representative = *Opcodes.begin(); + +  assert(Opcodes.begin() != Opcodes.end() && +         Opcodes.begin() + 1 != Opcodes.end() && +         "Initializer list must have at least two opcodes"); + +  for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) +    aliasActionDefinitions(Representative, *I); + +  auto &Return = getActionDefinitionsBuilder(Representative); +  Return.setIsAliasedByAnother(); +  return Return; +} + +void LegalizerInfo::aliasActionDefinitions(unsigned OpcodeTo, +                                           unsigned OpcodeFrom) { +  assert(OpcodeTo != OpcodeFrom && "Cannot alias to self"); +  assert(OpcodeTo >= FirstOp && OpcodeTo <= LastOp && "Unsupported opcode"); +  const unsigned OpcodeFromIdx = getOpcodeIdxForOpcode(OpcodeFrom); +  RulesForOpcode[OpcodeFromIdx].aliasTo(OpcodeTo); +} + +LegalizeActionStep +LegalizerInfo::getAction(const LegalityQuery &Query) const { +  LegalizeActionStep Step = getActionDefinitions(Query.Opcode).apply(Query); +  if (Step.Action != LegalizeAction::UseLegacyRules) { +    return Step; +  } + +  for (unsigned i = 0; i < Query.Types.size(); ++i) { +    auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]}); +    if (Action.first != Legal) { +      LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i +                        << " Action=" << (unsigned)Action.first << ", " +                        << Action.second << "\n"); +      return {Action.first, i, Action.second}; +    } else +      LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n"); +  } +  LLVM_DEBUG(dbgs() << ".. (legacy) Legal\n"); +  return {Legal, 0, LLT{}}; +} + +LegalizeActionStep  LegalizerInfo::getAction(const MachineInstr &MI,                           const MachineRegisterInfo &MRI) const { +  SmallVector<LLT, 2> Types;    SmallBitVector SeenTypes(8);    const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;    // FIXME: probably we'll need to cache the results here somehow? @@ -205,16 +359,20 @@ LegalizerInfo::getAction(const MachineInstr &MI,      SeenTypes.set(TypeIdx);      LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx); -    auto Action = getAction({MI.getOpcode(), TypeIdx, Ty}); -    if (Action.first != Legal) -      return std::make_tuple(Action.first, TypeIdx, Action.second); +    Types.push_back(Ty);    } -  return std::make_tuple(Legal, 0, LLT{}); + +  SmallVector<LegalityQuery::MemDesc, 2> MemDescrs; +  for (const auto &MMO : MI.memoperands()) +    MemDescrs.push_back( +        {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()}); + +  return getAction({MI.getOpcode(), Types, MemDescrs});  }  bool LegalizerInfo::isLegal(const MachineInstr &MI,                              const MachineRegisterInfo &MRI) const { -  return std::get<0>(getAction(MI, MRI)) == Legal; +  return getAction(MI, MRI).Action == Legal;  }  bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, @@ -312,17 +470,18 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {    case Unsupported:      return {Size, Unsupported};    case NotFound: +  case UseLegacyRules:      llvm_unreachable("NotFound");    }    llvm_unreachable("Action has an unknown enum value");  } -std::pair<LegalizerInfo::LegalizeAction, LLT> +std::pair<LegalizeAction, LLT>  LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {    assert(Aspect.Type.isScalar() || Aspect.Type.isPointer());    if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)      return {NotFound, LLT()}; -  const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; +  const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);    if (Aspect.Type.isPointer() &&        AddrSpace2PointerActions[OpcodeIdx].find(Aspect.Type.getAddressSpace()) ==            AddrSpace2PointerActions[OpcodeIdx].end()) { @@ -346,14 +505,14 @@ LegalizerInfo::findScalarLegalAction(const InstrAspect &Aspect) const {                                                  SizeAndAction.first)};  } -std::pair<LegalizerInfo::LegalizeAction, LLT> +std::pair<LegalizeAction, LLT>  LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {    assert(Aspect.Type.isVector());    // First legalize the vector element size, then legalize the number of    // lanes in the vector.    if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)      return {NotFound, Aspect.Type}; -  const unsigned OpcodeIdx = Aspect.Opcode - FirstOp; +  const unsigned OpcodeIdx = getOpcodeIdxForOpcode(Aspect.Opcode);    const unsigned TypeIdx = Aspect.Idx;    if (TypeIdx >= ScalarInVectorActions[OpcodeIdx].size())      return {NotFound, Aspect.Type}; @@ -380,3 +539,53 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {            LLT::vector(NumElementsAndAction.first,                        IntermediateType.getScalarSizeInBits())};  } + +/// \pre Type indices of every opcode form a dense set starting from 0. +void LegalizerInfo::verify(const MCInstrInfo &MII) const { +#ifndef NDEBUG +  std::vector<unsigned> FailedOpcodes; +  for (unsigned Opcode = FirstOp; Opcode <= LastOp; ++Opcode) { +    const MCInstrDesc &MCID = MII.get(Opcode); +    const unsigned NumTypeIdxs = std::accumulate( +        MCID.opInfo_begin(), MCID.opInfo_end(), 0U, +        [](unsigned Acc, const MCOperandInfo &OpInfo) { +          return OpInfo.isGenericType() +                     ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) +                     : Acc; +        }); +    LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode +                      << "): " << NumTypeIdxs << " type ind" +                      << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n"); +    const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode); +    if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs)) +      FailedOpcodes.push_back(Opcode); +  } +  if (!FailedOpcodes.empty()) { +    errs() << "The following opcodes have ill-defined legalization rules:"; +    for (unsigned Opcode : FailedOpcodes) +      errs() << " " << MII.getName(Opcode); +    errs() << "\n"; + +    report_fatal_error("ill-defined LegalizerInfo" +                       ", try -debug-only=legalizer-info for details"); +  } +#endif +} + +#ifndef NDEBUG +// FIXME: This should be in the MachineVerifier, but it can't use the +// LegalizerInfo as it's currently in the separate GlobalISel library. +// Note that RegBankSelected property already checked in the verifier +// has the same layering problem, but we only use inline methods so +// end up not needing to link against the GlobalISel library. +const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) { +  if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) { +    const MachineRegisterInfo &MRI = MF.getRegInfo(); +    for (const MachineBasicBlock &MBB : MF) +      for (const MachineInstr &MI : MBB) +        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) +	  return &MI; +  } +  return nullptr; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 8e16470b6f90..52b340753a50 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -44,6 +44,11 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {    }  } +void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { +  getSelectionDAGFallbackAnalysisUsage(AU); +  MachineFunctionPass::getAnalysisUsage(AU); +} +  bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,                             MachineBasicBlock *&InsertMBB) {    MachineInstr &MIUse = *MOUse.getParent(); @@ -59,7 +64,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {            MachineFunctionProperties::Property::FailedISel))      return false; -  DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');    init(MF); @@ -73,7 +78,7 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {      for (MachineInstr &MI : MBB) {        if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))          continue; -      DEBUG(dbgs() << "Should localize: " << MI); +      LLVM_DEBUG(dbgs() << "Should localize: " << MI);        assert(MI.getDesc().getNumDefs() == 1 &&               "More than one definition not supported yet");        unsigned Reg = MI.getOperand(0).getReg(); @@ -85,12 +90,12 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {          MachineOperand &MOUse = *MOIt++;          // Check if the use is already local.          MachineBasicBlock *InsertMBB; -        DEBUG(MachineInstr &MIUse = *MOUse.getParent(); -              dbgs() << "Checking use: " << MIUse -                     << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); +        LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); +                   dbgs() << "Checking use: " << MIUse +                          << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');          if (isLocalUse(MOUse, MI, InsertMBB))            continue; -        DEBUG(dbgs() << "Fixing non-local use\n"); +        LLVM_DEBUG(dbgs() << "Fixing non-local use\n");          Changed = true;          auto MBBAndReg = std::make_pair(InsertMBB, Reg);          auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); @@ -111,10 +116,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {            LocalizedMI->getOperand(0).setReg(NewReg);            NewVRegIt =                MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; -          DEBUG(dbgs() << "Inserted: " << *LocalizedMI); +          LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);          } -        DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) -                     << '\n'); +        LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) +                          << '\n');          // Update the user reg.          MOUse.setReg(NewVRegIt->second);        } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 475bb82e5b9c..9df931eb81b3 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -22,96 +22,103 @@  using namespace llvm; -void MachineIRBuilder::setMF(MachineFunction &MF) { -  this->MF = &MF; -  this->MBB = nullptr; -  this->MRI = &MF.getRegInfo(); -  this->TII = MF.getSubtarget().getInstrInfo(); -  this->DL = DebugLoc(); -  this->II = MachineBasicBlock::iterator(); -  this->InsertedInstr = nullptr; -} - -void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) { -  this->MBB = &MBB; -  this->II = MBB.end(); +void MachineIRBuilderBase::setMF(MachineFunction &MF) { +  State.MF = &MF; +  State.MBB = nullptr; +  State.MRI = &MF.getRegInfo(); +  State.TII = MF.getSubtarget().getInstrInfo(); +  State.DL = DebugLoc(); +  State.II = MachineBasicBlock::iterator(); +  State.InsertedInstr = nullptr; +} + +void MachineIRBuilderBase::setMBB(MachineBasicBlock &MBB) { +  State.MBB = &MBB; +  State.II = MBB.end();    assert(&getMF() == MBB.getParent() &&           "Basic block is in a different function");  } -void MachineIRBuilder::setInstr(MachineInstr &MI) { +void MachineIRBuilderBase::setInstr(MachineInstr &MI) {    assert(MI.getParent() && "Instruction is not part of a basic block");    setMBB(*MI.getParent()); -  this->II = MI.getIterator(); +  State.II = MI.getIterator();  } -void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB, -                                   MachineBasicBlock::iterator II) { +void MachineIRBuilderBase::setInsertPt(MachineBasicBlock &MBB, +                                       MachineBasicBlock::iterator II) {    assert(MBB.getParent() == &getMF() &&           "Basic block is in a different function"); -  this->MBB = &MBB; -  this->II = II; +  State.MBB = &MBB; +  State.II = II;  } -void MachineIRBuilder::recordInsertions( +void MachineIRBuilderBase::recordInsertion(MachineInstr *InsertedInstr) const { +  if (State.InsertedInstr) +    State.InsertedInstr(InsertedInstr); +} + +void MachineIRBuilderBase::recordInsertions(      std::function<void(MachineInstr *)> Inserted) { -  InsertedInstr = std::move(Inserted); +  State.InsertedInstr = std::move(Inserted);  } -void MachineIRBuilder::stopRecordingInsertions() { -  InsertedInstr = nullptr; +void MachineIRBuilderBase::stopRecordingInsertions() { +  State.InsertedInstr = nullptr;  }  //------------------------------------------------------------------------------  // Build instruction variants.  //------------------------------------------------------------------------------ -MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) { +MachineInstrBuilder MachineIRBuilderBase::buildInstr(unsigned Opcode) {    return insertInstr(buildInstrNoInsert(Opcode));  } -MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { -  MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode)); +MachineInstrBuilder MachineIRBuilderBase::buildInstrNoInsert(unsigned Opcode) { +  MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));    return MIB;  } - -MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) { +MachineInstrBuilder MachineIRBuilderBase::insertInstr(MachineInstrBuilder MIB) {    getMBB().insert(getInsertPt(), MIB); -  if (InsertedInstr) -    InsertedInstr(MIB); +  recordInsertion(MIB);    return MIB;  }  MachineInstrBuilder -MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, -                                      const MDNode *Expr) { +MachineIRBuilderBase::buildDirectDbgValue(unsigned Reg, const MDNode *Variable, +                                          const MDNode *Expr) {    assert(isa<DILocalVariable>(Variable) && "not a variable");    assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); -  assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && -         "Expected inlined-at fields to agree"); -  return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE), +  assert( +      cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && +      "Expected inlined-at fields to agree"); +  return insertInstr(BuildMI(getMF(), getDL(), +                             getTII().get(TargetOpcode::DBG_VALUE),                               /*IsIndirect*/ false, Reg, Variable, Expr));  } -MachineInstrBuilder -MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable, -                                        const MDNode *Expr) { +MachineInstrBuilder MachineIRBuilderBase::buildIndirectDbgValue( +    unsigned Reg, const MDNode *Variable, const MDNode *Expr) {    assert(isa<DILocalVariable>(Variable) && "not a variable");    assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); -  assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && -         "Expected inlined-at fields to agree"); -  return insertInstr(BuildMI(getMF(), DL, getTII().get(TargetOpcode::DBG_VALUE), +  assert( +      cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && +      "Expected inlined-at fields to agree"); +  return insertInstr(BuildMI(getMF(), getDL(), +                             getTII().get(TargetOpcode::DBG_VALUE),                               /*IsIndirect*/ true, Reg, Variable, Expr));  } -MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, -                                                      const MDNode *Variable, -                                                      const MDNode *Expr) { +MachineInstrBuilder +MachineIRBuilderBase::buildFIDbgValue(int FI, const MDNode *Variable, +                                      const MDNode *Expr) {    assert(isa<DILocalVariable>(Variable) && "not a variable");    assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); -  assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && -         "Expected inlined-at fields to agree"); +  assert( +      cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && +      "Expected inlined-at fields to agree");    return buildInstr(TargetOpcode::DBG_VALUE)        .addFrameIndex(FI)        .addImm(0) @@ -119,13 +126,13 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,        .addMetadata(Expr);  } -MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, -                                                         const MDNode *Variable, -                                                         const MDNode *Expr) { +MachineInstrBuilder MachineIRBuilderBase::buildConstDbgValue( +    const Constant &C, const MDNode *Variable, const MDNode *Expr) {    assert(isa<DILocalVariable>(Variable) && "not a variable");    assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); -  assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && -         "Expected inlined-at fields to agree"); +  assert( +      cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && +      "Expected inlined-at fields to agree");    auto MIB = buildInstr(TargetOpcode::DBG_VALUE);    if (auto *CI = dyn_cast<ConstantInt>(&C)) {      if (CI->getBitWidth() > 64) @@ -142,17 +149,18 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,    return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);  } -MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) { -  assert(MRI->getType(Res).isPointer() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildFrameIndex(unsigned Res, +                                                          int Idx) { +  assert(getMRI()->getType(Res).isPointer() && "invalid operand type");    return buildInstr(TargetOpcode::G_FRAME_INDEX)        .addDef(Res)        .addFrameIndex(Idx);  } -MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, -                                                       const GlobalValue *GV) { -  assert(MRI->getType(Res).isPointer() && "invalid operand type"); -  assert(MRI->getType(Res).getAddressSpace() == +MachineInstrBuilder +MachineIRBuilderBase::buildGlobalValue(unsigned Res, const GlobalValue *GV) { +  assert(getMRI()->getType(Res).isPointer() && "invalid operand type"); +  assert(getMRI()->getType(Res).getAddressSpace() ==               GV->getType()->getAddressSpace() &&           "address space mismatch"); @@ -161,29 +169,20 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,        .addGlobalAddress(GV);  } -MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, -                                               unsigned Op1) { -  assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && +void MachineIRBuilderBase::validateBinaryOp(unsigned Res, unsigned Op0, +                                            unsigned Op1) { +  assert((getMRI()->getType(Res).isScalar() || +          getMRI()->getType(Res).isVector()) &&           "invalid operand type"); -  assert(MRI->getType(Res) == MRI->getType(Op0) && -         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - -  return buildInstr(Opcode) -      .addDef(Res) -      .addUse(Op0) -      .addUse(Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, -                                               unsigned Op1) { -  return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1); +  assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && +         getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch");  } -MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, -                                               unsigned Op1) { -  assert(MRI->getType(Res).isPointer() && -         MRI->getType(Res) == MRI->getType(Op0) && "type mismatch"); -  assert(MRI->getType(Op1).isScalar()  && "invalid offset type"); +MachineInstrBuilder MachineIRBuilderBase::buildGEP(unsigned Res, unsigned Op0, +                                                   unsigned Op1) { +  assert(getMRI()->getType(Res).isPointer() && +         getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch"); +  assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");    return buildInstr(TargetOpcode::G_GEP)        .addDef(Res) @@ -192,8 +191,8 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,  }  Optional<MachineInstrBuilder> -MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, -                                 const LLT &ValueTy, uint64_t Value) { +MachineIRBuilderBase::materializeGEP(unsigned &Res, unsigned Op0, +                                     const LLT &ValueTy, uint64_t Value) {    assert(Res == 0 && "Res is a result argument");    assert(ValueTy.isScalar()  && "invalid offset type"); @@ -202,17 +201,18 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,      return None;    } -  Res = MRI->createGenericVirtualRegister(MRI->getType(Op0)); -  unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy); +  Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0)); +  unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy);    buildConstant(TmpReg, Value);    return buildGEP(Res, Op0, TmpReg);  } -MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, -                                                   uint32_t NumBits) { -  assert(MRI->getType(Res).isPointer() && -         MRI->getType(Res) == MRI->getType(Op0) && "type mismatch"); +MachineInstrBuilder MachineIRBuilderBase::buildPtrMask(unsigned Res, +                                                       unsigned Op0, +                                                       uint32_t NumBits) { +  assert(getMRI()->getType(Res).isPointer() && +         getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");    return buildInstr(TargetOpcode::G_PTR_MASK)        .addDef(Res) @@ -220,92 +220,88 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,        .addImm(NumBits);  } -MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0, -                                               unsigned Op1) { -  return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0, -                                               unsigned Op1) { -  return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0, -                                               unsigned Op1) { -  return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0, -                                              unsigned Op1) { -  return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1); -} - -MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { +MachineInstrBuilder MachineIRBuilderBase::buildBr(MachineBasicBlock &Dest) {    return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);  } -MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) { -  assert(MRI->getType(Tgt).isPointer() && "invalid branch destination"); +MachineInstrBuilder MachineIRBuilderBase::buildBrIndirect(unsigned Tgt) { +  assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");    return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);  } -MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) { -  assert(MRI->getType(Res) == LLT() || MRI->getType(Op) == LLT() || -         MRI->getType(Res) == MRI->getType(Op)); +MachineInstrBuilder MachineIRBuilderBase::buildCopy(unsigned Res, unsigned Op) { +  assert(getMRI()->getType(Res) == LLT() || getMRI()->getType(Op) == LLT() || +         getMRI()->getType(Res) == getMRI()->getType(Op));    return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);  } -MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, -                                                    const ConstantInt &Val) { -  LLT Ty = MRI->getType(Res); +MachineInstrBuilder +MachineIRBuilderBase::buildConstant(unsigned Res, const ConstantInt &Val) { +  LLT Ty = getMRI()->getType(Res);    assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");    const ConstantInt *NewVal = &Val;    if (Ty.getSizeInBits() != Val.getBitWidth()) -    NewVal = ConstantInt::get(MF->getFunction().getContext(), +    NewVal = ConstantInt::get(getMF().getFunction().getContext(),                                Val.getValue().sextOrTrunc(Ty.getSizeInBits()));    return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);  } -MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, -                                                    int64_t Val) { -  auto IntN = IntegerType::get(MF->getFunction().getContext(), -                               MRI->getType(Res).getSizeInBits()); +MachineInstrBuilder MachineIRBuilderBase::buildConstant(unsigned Res, +                                                        int64_t Val) { +  auto IntN = IntegerType::get(getMF().getFunction().getContext(), +                               getMRI()->getType(Res).getSizeInBits());    ConstantInt *CI = ConstantInt::get(IntN, Val, true);    return buildConstant(Res, *CI);  } -MachineInstrBuilder MachineIRBuilder::buildFConstant(unsigned Res, -                                                     const ConstantFP &Val) { -  assert(MRI->getType(Res).isScalar() && "invalid operand type"); +MachineInstrBuilder +MachineIRBuilderBase::buildFConstant(unsigned Res, const ConstantFP &Val) { +  assert(getMRI()->getType(Res).isScalar() && "invalid operand type");    return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);  } -MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst, -                                                  MachineBasicBlock &Dest) { -  assert(MRI->getType(Tst).isScalar() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildFConstant(unsigned Res, +                                                         double Val) { +  LLT DstTy = getMRI()->getType(Res); +  auto &Ctx = getMF().getFunction().getContext(); +  auto *CFP = +      ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits())); +  return buildFConstant(Res, *CFP); +} + +MachineInstrBuilder MachineIRBuilderBase::buildBrCond(unsigned Tst, +                                                      MachineBasicBlock &Dest) { +  assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");    return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);  } -MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr, -                                                MachineMemOperand &MMO) { -  assert(MRI->getType(Res).isValid() && "invalid operand type"); -  assert(MRI->getType(Addr).isPointer() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildLoad(unsigned Res, unsigned Addr, +                                                    MachineMemOperand &MMO) { +  return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildLoadInstr(unsigned Opcode, unsigned Res, +                                     unsigned Addr, MachineMemOperand &MMO) { +  assert(getMRI()->getType(Res).isValid() && "invalid operand type"); +  assert(getMRI()->getType(Addr).isPointer() && "invalid operand type"); -  return buildInstr(TargetOpcode::G_LOAD) +  return buildInstr(Opcode)        .addDef(Res)        .addUse(Addr)        .addMemOperand(&MMO);  } -MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr, -                                                 MachineMemOperand &MMO) { -  assert(MRI->getType(Val).isValid() && "invalid operand type"); -  assert(MRI->getType(Addr).isPointer() && "invalid operand type"); +MachineInstrBuilder MachineIRBuilderBase::buildStore(unsigned Val, +                                                     unsigned Addr, +                                                     MachineMemOperand &MMO) { +  assert(getMRI()->getType(Val).isValid() && "invalid operand type"); +  assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");    return buildInstr(TargetOpcode::G_STORE)        .addUse(Val) @@ -313,15 +309,16 @@ MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,        .addMemOperand(&MMO);  } -MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res, -                                                 unsigned CarryOut, -                                                 unsigned Op0, unsigned Op1, -                                                 unsigned CarryIn) { -  assert(MRI->getType(Res).isScalar() && "invalid operand type"); -  assert(MRI->getType(Res) == MRI->getType(Op0) && -         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); -  assert(MRI->getType(CarryOut).isScalar() && "invalid operand type"); -  assert(MRI->getType(CarryOut) == MRI->getType(CarryIn) && "type mismatch"); +MachineInstrBuilder MachineIRBuilderBase::buildUAdde(unsigned Res, +                                                     unsigned CarryOut, +                                                     unsigned Op0, unsigned Op1, +                                                     unsigned CarryIn) { +  assert(getMRI()->getType(Res).isScalar() && "invalid operand type"); +  assert(getMRI()->getType(Res) == getMRI()->getType(Op0) && +         getMRI()->getType(Res) == getMRI()->getType(Op1) && "type mismatch"); +  assert(getMRI()->getType(CarryOut).isScalar() && "invalid operand type"); +  assert(getMRI()->getType(CarryOut) == getMRI()->getType(CarryIn) && +         "type mismatch");    return buildInstr(TargetOpcode::G_UADDE)        .addDef(Res) @@ -331,58 +328,64 @@ MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res,        .addUse(CarryIn);  } -MachineInstrBuilder MachineIRBuilder::buildAnyExt(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildAnyExt(unsigned Res, +                                                      unsigned Op) {    validateTruncExt(Res, Op, true);    return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);  } -MachineInstrBuilder MachineIRBuilder::buildSExt(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildSExt(unsigned Res, unsigned Op) {    validateTruncExt(Res, Op, true);    return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);  } -MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildZExt(unsigned Res, unsigned Op) {    validateTruncExt(Res, Op, true);    return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);  } -MachineInstrBuilder -MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc, unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildExtOrTrunc(unsigned ExtOpc, +                                                          unsigned Res, +                                                          unsigned Op) {    assert((TargetOpcode::G_ANYEXT == ExtOpc || TargetOpcode::G_ZEXT == ExtOpc ||            TargetOpcode::G_SEXT == ExtOpc) &&           "Expecting Extending Opc"); -  assert(MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()); -  assert(MRI->getType(Res).isScalar() == MRI->getType(Op).isScalar()); +  assert(getMRI()->getType(Res).isScalar() || +         getMRI()->getType(Res).isVector()); +  assert(getMRI()->getType(Res).isScalar() == getMRI()->getType(Op).isScalar());    unsigned Opcode = TargetOpcode::COPY; -  if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits()) +  if (getMRI()->getType(Res).getSizeInBits() > +      getMRI()->getType(Op).getSizeInBits())      Opcode = ExtOpc; -  else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits()) +  else if (getMRI()->getType(Res).getSizeInBits() < +           getMRI()->getType(Op).getSizeInBits())      Opcode = TargetOpcode::G_TRUNC;    else -    assert(MRI->getType(Res) == MRI->getType(Op)); +    assert(getMRI()->getType(Res) == getMRI()->getType(Op));    return buildInstr(Opcode).addDef(Res).addUse(Op);  } -MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res, -                                                       unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildSExtOrTrunc(unsigned Res, +                                                           unsigned Op) {    return buildExtOrTrunc(TargetOpcode::G_SEXT, Res, Op);  } -MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res, -                                                       unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildZExtOrTrunc(unsigned Res, +                                                           unsigned Op) {    return buildExtOrTrunc(TargetOpcode::G_ZEXT, Res, Op);  } -MachineInstrBuilder MachineIRBuilder::buildAnyExtOrTrunc(unsigned Res, -                                                         unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildAnyExtOrTrunc(unsigned Res, +                                                             unsigned Op) {    return buildExtOrTrunc(TargetOpcode::G_ANYEXT, Res, Op);  } -MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) { -  LLT SrcTy = MRI->getType(Src); -  LLT DstTy = MRI->getType(Dst); +MachineInstrBuilder MachineIRBuilderBase::buildCast(unsigned Dst, +                                                    unsigned Src) { +  LLT SrcTy = getMRI()->getType(Src); +  LLT DstTy = getMRI()->getType(Dst);    if (SrcTy == DstTy)      return buildCopy(Dst, Src); @@ -399,17 +402,18 @@ MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {    return buildInstr(Opcode).addDef(Dst).addUse(Src);  } -MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src, -                                                   uint64_t Index) { +MachineInstrBuilder +MachineIRBuilderBase::buildExtract(unsigned Res, unsigned Src, uint64_t Index) {  #ifndef NDEBUG -  assert(MRI->getType(Src).isValid() && "invalid operand type"); -  assert(MRI->getType(Res).isValid() && "invalid operand type"); -  assert(Index + MRI->getType(Res).getSizeInBits() <= -             MRI->getType(Src).getSizeInBits() && +  assert(getMRI()->getType(Src).isValid() && "invalid operand type"); +  assert(getMRI()->getType(Res).isValid() && "invalid operand type"); +  assert(Index + getMRI()->getType(Res).getSizeInBits() <= +             getMRI()->getType(Src).getSizeInBits() &&           "extracting off end of register");  #endif -  if (MRI->getType(Res).getSizeInBits() == MRI->getType(Src).getSizeInBits()) { +  if (getMRI()->getType(Res).getSizeInBits() == +      getMRI()->getType(Src).getSizeInBits()) {      assert(Index == 0 && "insertion past the end of a register");      return buildCast(Res, Src);    } @@ -420,25 +424,25 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,        .addImm(Index);  } -void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, -                                     ArrayRef<uint64_t> Indices) { +void MachineIRBuilderBase::buildSequence(unsigned Res, ArrayRef<unsigned> Ops, +                                         ArrayRef<uint64_t> Indices) {  #ifndef NDEBUG    assert(Ops.size() == Indices.size() && "incompatible args");    assert(!Ops.empty() && "invalid trivial sequence");    assert(std::is_sorted(Indices.begin(), Indices.end()) &&           "sequence offsets must be in ascending order"); -  assert(MRI->getType(Res).isValid() && "invalid operand type"); +  assert(getMRI()->getType(Res).isValid() && "invalid operand type");    for (auto Op : Ops) -    assert(MRI->getType(Op).isValid() && "invalid operand type"); +    assert(getMRI()->getType(Op).isValid() && "invalid operand type");  #endif -  LLT ResTy = MRI->getType(Res); -  LLT OpTy = MRI->getType(Ops[0]); +  LLT ResTy = getMRI()->getType(Res); +  LLT OpTy = getMRI()->getType(Ops[0]);    unsigned OpSize = OpTy.getSizeInBits();    bool MaybeMerge = true;    for (unsigned i = 0; i < Ops.size(); ++i) { -    if (MRI->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { +    if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {        MaybeMerge = false;        break;      } @@ -449,31 +453,32 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,      return;    } -  unsigned ResIn = MRI->createGenericVirtualRegister(ResTy); +  unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy);    buildUndef(ResIn);    for (unsigned i = 0; i < Ops.size(); ++i) { -    unsigned ResOut = -        i + 1 == Ops.size() ? Res : MRI->createGenericVirtualRegister(ResTy); +    unsigned ResOut = i + 1 == Ops.size() +                          ? Res +                          : getMRI()->createGenericVirtualRegister(ResTy);      buildInsert(ResOut, ResIn, Ops[i], Indices[i]);      ResIn = ResOut;    }  } -MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) { +MachineInstrBuilder MachineIRBuilderBase::buildUndef(unsigned Res) {    return buildInstr(TargetOpcode::G_IMPLICIT_DEF).addDef(Res);  } -MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res, -                                                 ArrayRef<unsigned> Ops) { +MachineInstrBuilder MachineIRBuilderBase::buildMerge(unsigned Res, +                                                     ArrayRef<unsigned> Ops) {  #ifndef NDEBUG    assert(!Ops.empty() && "invalid trivial sequence"); -  LLT Ty = MRI->getType(Ops[0]); +  LLT Ty = getMRI()->getType(Ops[0]);    for (auto Reg : Ops) -    assert(MRI->getType(Reg) == Ty && "type mismatch in input list"); -  assert(Ops.size() * MRI->getType(Ops[0]).getSizeInBits() == -             MRI->getType(Res).getSizeInBits() && +    assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); +  assert(Ops.size() * getMRI()->getType(Ops[0]).getSizeInBits() == +             getMRI()->getType(Res).getSizeInBits() &&           "input operands do not cover output register");  #endif @@ -487,16 +492,16 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,    return MIB;  } -MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res, -                                                   unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildUnmerge(ArrayRef<unsigned> Res, +                                                       unsigned Op) {  #ifndef NDEBUG    assert(!Res.empty() && "invalid trivial sequence"); -  LLT Ty = MRI->getType(Res[0]); +  LLT Ty = getMRI()->getType(Res[0]);    for (auto Reg : Res) -    assert(MRI->getType(Reg) == Ty && "type mismatch in input list"); -  assert(Res.size() * MRI->getType(Res[0]).getSizeInBits() == -             MRI->getType(Op).getSizeInBits() && +    assert(getMRI()->getType(Reg) == Ty && "type mismatch in input list"); +  assert(Res.size() * getMRI()->getType(Res[0]).getSizeInBits() == +             getMRI()->getType(Op).getSizeInBits() &&           "input operands do not cover output register");  #endif @@ -507,13 +512,15 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,    return MIB;  } -MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src, -                                                  unsigned Op, unsigned Index) { -  assert(Index + MRI->getType(Op).getSizeInBits() <= -             MRI->getType(Res).getSizeInBits() && +MachineInstrBuilder MachineIRBuilderBase::buildInsert(unsigned Res, +                                                      unsigned Src, unsigned Op, +                                                      unsigned Index) { +  assert(Index + getMRI()->getType(Op).getSizeInBits() <= +             getMRI()->getType(Res).getSizeInBits() &&           "insertion past the end of a register"); -  if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) { +  if (getMRI()->getType(Res).getSizeInBits() == +      getMRI()->getType(Op).getSizeInBits()) {      return buildCast(Res, Op);    } @@ -524,9 +531,9 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,        .addImm(Index);  } -MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID, -                                                     unsigned Res, -                                                     bool HasSideEffects) { +MachineInstrBuilder MachineIRBuilderBase::buildIntrinsic(Intrinsic::ID ID, +                                                         unsigned Res, +                                                         bool HasSideEffects) {    auto MIB =        buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS                                  : TargetOpcode::G_INTRINSIC); @@ -536,28 +543,30 @@ MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,    return MIB;  } -MachineInstrBuilder MachineIRBuilder::buildTrunc(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildTrunc(unsigned Res, +                                                     unsigned Op) {    validateTruncExt(Res, Op, false);    return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);  } -MachineInstrBuilder MachineIRBuilder::buildFPTrunc(unsigned Res, unsigned Op) { +MachineInstrBuilder MachineIRBuilderBase::buildFPTrunc(unsigned Res, +                                                       unsigned Op) {    validateTruncExt(Res, Op, false);    return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);  } -MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, -                                                unsigned Res, unsigned Op0, -                                                unsigned Op1) { +MachineInstrBuilder MachineIRBuilderBase::buildICmp(CmpInst::Predicate Pred, +                                                    unsigned Res, unsigned Op0, +                                                    unsigned Op1) {  #ifndef NDEBUG -  assert(MRI->getType(Op0) == MRI->getType(Op0) && "type mismatch"); +  assert(getMRI()->getType(Op0) == getMRI()->getType(Op0) && "type mismatch");    assert(CmpInst::isIntPredicate(Pred) && "invalid predicate"); -  if (MRI->getType(Op0).isScalar() || MRI->getType(Op0).isPointer()) -    assert(MRI->getType(Res).isScalar() && "type mismatch"); +  if (getMRI()->getType(Op0).isScalar() || getMRI()->getType(Op0).isPointer()) +    assert(getMRI()->getType(Res).isScalar() && "type mismatch");    else -    assert(MRI->getType(Res).isVector() && -           MRI->getType(Res).getNumElements() == -               MRI->getType(Op0).getNumElements() && +    assert(getMRI()->getType(Res).isVector() && +           getMRI()->getType(Res).getNumElements() == +               getMRI()->getType(Op0).getNumElements() &&             "type mismatch");  #endif @@ -568,20 +577,21 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,        .addUse(Op1);  } -MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, -                                                unsigned Res, unsigned Op0, -                                                unsigned Op1) { +MachineInstrBuilder MachineIRBuilderBase::buildFCmp(CmpInst::Predicate Pred, +                                                    unsigned Res, unsigned Op0, +                                                    unsigned Op1) {  #ifndef NDEBUG -  assert((MRI->getType(Op0).isScalar() || MRI->getType(Op0).isVector()) && +  assert((getMRI()->getType(Op0).isScalar() || +          getMRI()->getType(Op0).isVector()) &&           "invalid operand type"); -  assert(MRI->getType(Op0) == MRI->getType(Op1) && "type mismatch"); +  assert(getMRI()->getType(Op0) == getMRI()->getType(Op1) && "type mismatch");    assert(CmpInst::isFPPredicate(Pred) && "invalid predicate"); -  if (MRI->getType(Op0).isScalar()) -    assert(MRI->getType(Res).isScalar() && "type mismatch"); +  if (getMRI()->getType(Op0).isScalar()) +    assert(getMRI()->getType(Res).isScalar() && "type mismatch");    else -    assert(MRI->getType(Res).isVector() && -           MRI->getType(Res).getNumElements() == -               MRI->getType(Op0).getNumElements() && +    assert(getMRI()->getType(Res).isVector() && +           getMRI()->getType(Res).getNumElements() == +               getMRI()->getType(Op0).getNumElements() &&             "type mismatch");  #endif @@ -592,21 +602,23 @@ MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,        .addUse(Op1);  } -MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst, -                                                  unsigned Op0, unsigned Op1) { +MachineInstrBuilder MachineIRBuilderBase::buildSelect(unsigned Res, +                                                      unsigned Tst, +                                                      unsigned Op0, +                                                      unsigned Op1) {  #ifndef NDEBUG -  LLT ResTy = MRI->getType(Res); +  LLT ResTy = getMRI()->getType(Res);    assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&           "invalid operand type"); -  assert(ResTy == MRI->getType(Op0) && ResTy == MRI->getType(Op1) && +  assert(ResTy == getMRI()->getType(Op0) && ResTy == getMRI()->getType(Op1) &&           "type mismatch");    if (ResTy.isScalar() || ResTy.isPointer()) -    assert(MRI->getType(Tst).isScalar() && "type mismatch"); +    assert(getMRI()->getType(Tst).isScalar() && "type mismatch");    else -    assert((MRI->getType(Tst).isScalar() || -            (MRI->getType(Tst).isVector() && -             MRI->getType(Tst).getNumElements() == -                 MRI->getType(Op0).getNumElements())) && +    assert((getMRI()->getType(Tst).isScalar() || +            (getMRI()->getType(Tst).isVector() && +             getMRI()->getType(Tst).getNumElements() == +                 getMRI()->getType(Op0).getNumElements())) &&             "type mismatch");  #endif @@ -617,15 +629,14 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,        .addUse(Op1);  } -MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res, -                                                               unsigned Val, -                                                               unsigned Elt, -                                                               unsigned Idx) { +MachineInstrBuilder +MachineIRBuilderBase::buildInsertVectorElement(unsigned Res, unsigned Val, +                                               unsigned Elt, unsigned Idx) {  #ifndef NDEBUG -  LLT ResTy = MRI->getType(Res); -  LLT ValTy = MRI->getType(Val); -  LLT EltTy = MRI->getType(Elt); -  LLT IdxTy = MRI->getType(Idx); +  LLT ResTy = getMRI()->getType(Res); +  LLT ValTy = getMRI()->getType(Val); +  LLT EltTy = getMRI()->getType(Elt); +  LLT IdxTy = getMRI()->getType(Idx);    assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");    assert(IdxTy.isScalar() && "invalid operand type");    assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch"); @@ -639,13 +650,13 @@ MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,        .addUse(Idx);  } -MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res, -                                                                unsigned Val, -                                                                unsigned Idx) { +MachineInstrBuilder +MachineIRBuilderBase::buildExtractVectorElement(unsigned Res, unsigned Val, +                                                unsigned Idx) {  #ifndef NDEBUG -  LLT ResTy = MRI->getType(Res); -  LLT ValTy = MRI->getType(Val); -  LLT IdxTy = MRI->getType(Idx); +  LLT ResTy = getMRI()->getType(Res); +  LLT ValTy = getMRI()->getType(Val); +  LLT IdxTy = getMRI()->getType(Idx);    assert(ValTy.isVector() && "invalid operand type");    assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type");    assert(IdxTy.isScalar() && "invalid operand type"); @@ -658,15 +669,42 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,        .addUse(Idx);  } +MachineInstrBuilder MachineIRBuilderBase::buildAtomicCmpXchgWithSuccess( +    unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal, +    unsigned NewVal, MachineMemOperand &MMO) { +#ifndef NDEBUG +  LLT OldValResTy = getMRI()->getType(OldValRes); +  LLT SuccessResTy = getMRI()->getType(SuccessRes); +  LLT AddrTy = getMRI()->getType(Addr); +  LLT CmpValTy = getMRI()->getType(CmpVal); +  LLT NewValTy = getMRI()->getType(NewVal); +  assert(OldValResTy.isScalar() && "invalid operand type"); +  assert(SuccessResTy.isScalar() && "invalid operand type"); +  assert(AddrTy.isPointer() && "invalid operand type"); +  assert(CmpValTy.isValid() && "invalid operand type"); +  assert(NewValTy.isValid() && "invalid operand type"); +  assert(OldValResTy == CmpValTy && "type mismatch"); +  assert(OldValResTy == NewValTy && "type mismatch"); +#endif + +  return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) +      .addDef(OldValRes) +      .addDef(SuccessRes) +      .addUse(Addr) +      .addUse(CmpVal) +      .addUse(NewVal) +      .addMemOperand(&MMO); +} +  MachineInstrBuilder -MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, -                                     unsigned CmpVal, unsigned NewVal, -                                     MachineMemOperand &MMO) { +MachineIRBuilderBase::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, +                                         unsigned CmpVal, unsigned NewVal, +                                         MachineMemOperand &MMO) {  #ifndef NDEBUG -  LLT OldValResTy = MRI->getType(OldValRes); -  LLT AddrTy = MRI->getType(Addr); -  LLT CmpValTy = MRI->getType(CmpVal); -  LLT NewValTy = MRI->getType(NewVal); +  LLT OldValResTy = getMRI()->getType(OldValRes); +  LLT AddrTy = getMRI()->getType(Addr); +  LLT CmpValTy = getMRI()->getType(CmpVal); +  LLT NewValTy = getMRI()->getType(NewVal);    assert(OldValResTy.isScalar() && "invalid operand type");    assert(AddrTy.isPointer() && "invalid operand type");    assert(CmpValTy.isValid() && "invalid operand type"); @@ -683,14 +721,102 @@ MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,        .addMemOperand(&MMO);  } -void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src, -                                        bool IsExtend) { +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMW(unsigned Opcode, unsigned OldValRes, +                                     unsigned Addr, unsigned Val, +                                     MachineMemOperand &MMO) { +#ifndef NDEBUG +  LLT OldValResTy = getMRI()->getType(OldValRes); +  LLT AddrTy = getMRI()->getType(Addr); +  LLT ValTy = getMRI()->getType(Val); +  assert(OldValResTy.isScalar() && "invalid operand type"); +  assert(AddrTy.isPointer() && "invalid operand type"); +  assert(ValTy.isValid() && "invalid operand type"); +  assert(OldValResTy == ValTy && "type mismatch"); +#endif + +  return buildInstr(Opcode) +      .addDef(OldValRes) +      .addUse(Addr) +      .addUse(Val) +      .addMemOperand(&MMO); +} + +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr, +                                         unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr, +                                        unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr, +                                        unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr, +                                        unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr, +                                         unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWOr(unsigned OldValRes, unsigned Addr, +                                       unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr, +                                        unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr, +                                        unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr, +                                        unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr, +                                         unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val, +                        MMO); +} +MachineInstrBuilder +MachineIRBuilderBase::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr, +                                         unsigned Val, MachineMemOperand &MMO) { +  return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val, +                        MMO); +} + +void MachineIRBuilderBase::validateTruncExt(unsigned Dst, unsigned Src, +                                            bool IsExtend) {  #ifndef NDEBUG -  LLT SrcTy = MRI->getType(Src); -  LLT DstTy = MRI->getType(Dst); +  LLT SrcTy = getMRI()->getType(Src); +  LLT DstTy = getMRI()->getType(Dst);    if (DstTy.isVector()) { -    assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector"); +    assert(SrcTy.isVector() && "mismatched cast between vector and non-vector");      assert(SrcTy.getNumElements() == DstTy.getNumElements() &&             "different number of elements in a trunc/ext");    } else diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 006c9ea23034..9e2d48d1dc42 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -30,6 +30,7 @@  #include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/Function.h"  #include "llvm/Pass.h" @@ -75,7 +76,7 @@ RegBankSelect::RegBankSelect(Mode RunningMode)    if (RegBankSelectMode.getNumOccurrences() != 0) {      OptMode = RegBankSelectMode;      if (RegBankSelectMode != RunningMode) -      DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n"); +      LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");    }  } @@ -104,6 +105,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {      AU.addRequired<MachineBranchProbabilityInfo>();    }    AU.addRequired<TargetPassConfig>(); +  getSelectionDAGFallbackAnalysisUsage(AU);    MachineFunctionPass::getAnalysisUsage(AU);  } @@ -122,11 +124,11 @@ bool RegBankSelect::assignmentMatch(    // Reg is free of assignment, a simple assignment will make the    // register bank to match.    OnlyAssign = CurRegBank == nullptr; -  DEBUG(dbgs() << "Does assignment already match: "; -        if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none"; -        dbgs() << " against "; -        assert(DesiredRegBrank && "The mapping must be valid"); -        dbgs() << *DesiredRegBrank << '\n';); +  LLVM_DEBUG(dbgs() << "Does assignment already match: "; +             if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none"; +             dbgs() << " against "; +             assert(DesiredRegBrank && "The mapping must be valid"); +             dbgs() << *DesiredRegBrank << '\n';);    return CurRegBank == DesiredRegBrank;  } @@ -159,8 +161,8 @@ bool RegBankSelect::repairReg(    // same types because the type is a placeholder when this function is called.    MachineInstr *MI =        MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src); -  DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) -               << '\n'); +  LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) +                    << '\n');    // TODO:    // Check if MI is legal. if not, we need to legalize all the    // instructions we are going to insert. @@ -245,7 +247,7 @@ const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(      MappingCost CurCost =          computeMapping(MI, *CurMapping, LocalRepairPts, &Cost);      if (CurCost < Cost) { -      DEBUG(dbgs() << "New best: " << CurCost << '\n'); +      LLVM_DEBUG(dbgs() << "New best: " << CurCost << '\n');        Cost = CurCost;        BestMapping = CurMapping;        RepairPts.clear(); @@ -397,11 +399,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(    MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);    bool Saturated = Cost.addLocalCost(InstrMapping.getCost());    assert(!Saturated && "Possible mapping saturated the cost"); -  DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); -  DEBUG(dbgs() << "With: " << InstrMapping << '\n'); +  LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); +  LLVM_DEBUG(dbgs() << "With: " << InstrMapping << '\n');    RepairPts.clear();    if (BestCost && Cost > *BestCost) { -    DEBUG(dbgs() << "Mapping is too expensive from the start\n"); +    LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n");      return Cost;    } @@ -417,17 +419,17 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(      unsigned Reg = MO.getReg();      if (!Reg)        continue; -    DEBUG(dbgs() << "Opd" << OpIdx << '\n'); +    LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');      const RegisterBankInfo::ValueMapping &ValMapping =          InstrMapping.getOperandMapping(OpIdx);      // If Reg is already properly mapped, this is free.      bool Assign;      if (assignmentMatch(Reg, ValMapping, Assign)) { -      DEBUG(dbgs() << "=> is free (match).\n"); +      LLVM_DEBUG(dbgs() << "=> is free (match).\n");        continue;      }      if (Assign) { -      DEBUG(dbgs() << "=> is free (simple assignment).\n"); +      LLVM_DEBUG(dbgs() << "=> is free (simple assignment).\n");        RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,                                                  RepairingPlacement::Reassign));        continue; @@ -446,7 +448,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(      // Check that the materialization of the repairing is possible.      if (!RepairPt.canMaterialize()) { -      DEBUG(dbgs() << "Mapping involves impossible repairing\n"); +      LLVM_DEBUG(dbgs() << "Mapping involves impossible repairing\n");        return MappingCost::ImpossibleCost();      } @@ -473,7 +475,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(      // This is an impossible to repair cost.      if (RepairCost == std::numeric_limits<unsigned>::max()) -      continue; +      return MappingCost::ImpossibleCost();      // Bias used for splitting: 5%.      const uint64_t PercentageForBias = 5; @@ -509,7 +511,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(        // Stop looking into what it takes to repair, this is already        // too expensive.        if (BestCost && Cost > *BestCost) { -        DEBUG(dbgs() << "Mapping is too expensive, stop processing\n"); +        LLVM_DEBUG(dbgs() << "Mapping is too expensive, stop processing\n");          return Cost;        } @@ -519,7 +521,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(          break;      }    } -  DEBUG(dbgs() << "Total cost is: " << Cost << "\n"); +  LLVM_DEBUG(dbgs() << "Total cost is: " << Cost << "\n");    return Cost;  } @@ -559,14 +561,14 @@ bool RegBankSelect::applyMapping(    }    // Second, rewrite the instruction. -  DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); +  LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');    RBI->applyMapping(OpdMapper);    return true;  }  bool RegBankSelect::assignInstr(MachineInstr &MI) { -  DEBUG(dbgs() << "Assign: " << MI); +  LLVM_DEBUG(dbgs() << "Assign: " << MI);    // Remember the repairing placement for all the operands.    SmallVector<RepairingPlacement, 4> RepairPts; @@ -587,7 +589,7 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {    // Make sure the mapping is valid for MI.    assert(BestMapping->verify(MI) && "Invalid instruction mapping"); -  DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n'); +  LLVM_DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n');    // After this call, MI may not be valid anymore.    // Do not use it. @@ -600,7 +602,7 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {            MachineFunctionProperties::Property::FailedISel))      return false; -  DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');    const Function &F = MF.getFunction();    Mode SaveOptMode = OptMode;    if (F.hasFnAttribute(Attribute::OptimizeNone)) @@ -610,20 +612,13 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {  #ifndef NDEBUG    // Check that our input is fully legal: we require the function to have the    // Legalized property, so it should be. -  // FIXME: This should be in the MachineVerifier, but it can't use the -  // LegalizerInfo as it's currently in the separate GlobalISel library. -  const MachineRegisterInfo &MRI = MF.getRegInfo(); -  if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) { -    for (MachineBasicBlock &MBB : MF) { -      for (MachineInstr &MI : MBB) { -        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) { -          reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", -                             "instruction is not legal", MI); -          return false; -        } -      } +  // FIXME: This should be in the MachineVerifier. +  if (!DisableGISelLegalityCheck) +    if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) { +      reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect", +                         "instruction is not legal", *MI); +      return false;      } -  }  #endif    // Walk the function and assign register banks to all operands. diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp index 4d3ae69d3a9d..16f67a217ce1 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -12,6 +12,7 @@  #include "llvm/CodeGen/GlobalISel/RegisterBank.h"  #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h"  #define DEBUG_TYPE "registerbank" diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index b3d9209ae6eb..dd15567ef1c1 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -22,6 +22,7 @@  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Type.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" @@ -72,7 +73,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {      const RegisterBank &RegBank = getRegBank(Idx);      assert(Idx == RegBank.getID() &&             "ID does not match the index in the array"); -    DEBUG(dbgs() << "Verify " << RegBank << '\n'); +    LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n');      assert(RegBank.verify(TRI) && "RegBank is invalid");    }  #endif // NDEBUG @@ -403,18 +404,18 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {  void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {    MachineInstr &MI = OpdMapper.getMI();    MachineRegisterInfo &MRI = OpdMapper.getMRI(); -  DEBUG(dbgs() << "Applying default-like mapping\n"); +  LLVM_DEBUG(dbgs() << "Applying default-like mapping\n");    for (unsigned OpIdx = 0,                  EndIdx = OpdMapper.getInstrMapping().getNumOperands();         OpIdx != EndIdx; ++OpIdx) { -    DEBUG(dbgs() << "OpIdx " << OpIdx); +    LLVM_DEBUG(dbgs() << "OpIdx " << OpIdx);      MachineOperand &MO = MI.getOperand(OpIdx);      if (!MO.isReg()) { -      DEBUG(dbgs() << " is not a register, nothing to be done\n"); +      LLVM_DEBUG(dbgs() << " is not a register, nothing to be done\n");        continue;      }      if (!MO.getReg()) { -      DEBUG(dbgs() << " is %%noreg, nothing to be done\n"); +      LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n");        continue;      }      assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns != @@ -426,14 +427,14 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {      iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =          OpdMapper.getVRegs(OpIdx);      if (NewRegs.begin() == NewRegs.end()) { -      DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); +      LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");        continue;      }      unsigned OrigReg = MO.getReg();      unsigned NewReg = *NewRegs.begin(); -    DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr)); +    LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));      MO.setReg(NewReg); -    DEBUG(dbgs() << " with " << printReg(NewReg, nullptr)); +    LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));      // The OperandsMapper creates plain scalar, we may have to fix that.      // Check if the types match and if not, fix that. @@ -447,35 +448,27 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {        assert(OrigTy.getSizeInBits() <= NewTy.getSizeInBits() &&               "Types with difference size cannot be handled by the default "               "mapping"); -      DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " -                   << OrigTy); +      LLVM_DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to " +                        << OrigTy);        MRI.setType(NewReg, OrigTy);      } -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');    }  }  unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,                                           const MachineRegisterInfo &MRI,                                           const TargetRegisterInfo &TRI) const { -  const TargetRegisterClass *RC = nullptr;    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {      // The size is not directly available for physical registers.      // Instead, we need to access a register class that contains Reg and      // get the size of that register class. -    RC = &getMinimalPhysRegClass(Reg, TRI); -  } else { -    LLT Ty = MRI.getType(Reg); -    unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; -    // If Reg is not a generic register, query the register class to -    // get its size. -    if (RegSize) -      return RegSize; -    // Since Reg is not a generic register, it must have a register class. -    RC = MRI.getRegClass(Reg); +    // Because this is expensive, we'll cache the register class by calling +    auto *RC = &getMinimalPhysRegClass(Reg, TRI); +    assert(RC && "Expecting Register class"); +    return TRI.getRegSizeInBits(*RC);    } -  assert(RC && "Unable to deduce the register class"); -  return TRI.getRegSizeInBits(*RC); +  return TRI.getRegSizeInBits(Reg, MRI);  }  //------------------------------------------------------------------------------ diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp index ef990b49aceb..1a5f88743d5f 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -11,12 +11,14 @@  //===----------------------------------------------------------------------===//  #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/ADT/APFloat.h"  #include "llvm/ADT/Twine.h"  #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineInstrBuilder.h"  #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -42,20 +44,94 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,    return Reg;  } -  unsigned llvm::constrainOperandRegClass(      const MachineFunction &MF, const TargetRegisterInfo &TRI,      MachineRegisterInfo &MRI, const TargetInstrInfo &TII,      const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, -    unsigned Reg, unsigned OpIdx) { +    const MachineOperand &RegMO, unsigned OpIdx) { +  unsigned Reg = RegMO.getReg();    // Assume physical registers are properly constrained.    assert(TargetRegisterInfo::isVirtualRegister(Reg) &&           "PhysReg not implemented");    const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); +  // Some of the target independent instructions, like COPY, may not impose any +  // register class constraints on some of their operands: If it's a use, we can +  // skip constraining as the instruction defining the register would constrain +  // it. + +  // We can't constrain unallocatable register classes, because we can't create +  // virtual registers for these classes, so we need to let targets handled this +  // case. +  if (RegClass && !RegClass->isAllocatable()) +    RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI); + +  if (!RegClass) { +    assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) && +           "Register class constraint is required unless either the " +           "instruction is target independent or the operand is a use"); +    // FIXME: Just bailing out like this here could be not enough, unless we +    // expect the users of this function to do the right thing for PHIs and +    // COPY: +    //   v1 = COPY v0 +    //   v2 = COPY v1 +    // v1 here may end up not being constrained at all. Please notice that to +    // reproduce the issue we likely need a destination pattern of a selection +    // rule producing such extra copies, not just an input GMIR with them as +    // every existing target using selectImpl handles copies before calling it +    // and they never reach this function. +    return Reg; +  }    return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);  } +bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, +                                            const TargetInstrInfo &TII, +                                            const TargetRegisterInfo &TRI, +                                            const RegisterBankInfo &RBI) { +  assert(!isPreISelGenericOpcode(I.getOpcode()) && +         "A selected instruction is expected"); +  MachineBasicBlock &MBB = *I.getParent(); +  MachineFunction &MF = *MBB.getParent(); +  MachineRegisterInfo &MRI = MF.getRegInfo(); + +  for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { +    MachineOperand &MO = I.getOperand(OpI); + +    // There's nothing to be done on non-register operands. +    if (!MO.isReg()) +      continue; + +    LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); +    assert(MO.isReg() && "Unsupported non-reg operand"); + +    unsigned Reg = MO.getReg(); +    // Physical registers don't need to be constrained. +    if (TRI.isPhysicalRegister(Reg)) +      continue; + +    // Register operands with a value of 0 (e.g. predicate operands) don't need +    // to be constrained. +    if (Reg == 0) +      continue; + +    // If the operand is a vreg, we should constrain its regclass, and only +    // insert COPYs if that's impossible. +    // constrainOperandRegClass does that for us. +    MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), +                                       MO, OpI)); + +    // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been +    // done. +    if (MO.isUse()) { +      int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); +      if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) +        I.tieOperands(DefIdx, OpI); +    } +  } +  return true; +} +  bool llvm::isTriviallyDead(const MachineInstr &MI,                             const MachineRegisterInfo &MRI) {    // If we can move an instruction, we can remove it.  Otherwise, it has @@ -101,7 +177,7 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,                                      MI.getDebugLoc(), MI.getParent());    R << Msg;    // Printing MI is expensive;  only do it if expensive remarks are enabled. -  if (MORE.allowExtraAnalysis(PassName)) +  if (TPC.isGlobalISelAbortEnabled() || MORE.allowExtraAnalysis(PassName))      R << ": " << ore::MNV("Inst", MI);    reportGISelFailure(MF, TPC, MORE, R);  } @@ -145,3 +221,20 @@ llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,    }    return DefMI->getOpcode() == Opcode ? DefMI : nullptr;  } + +APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) { +  if (Size == 32) +    return APFloat(float(Val)); +  if (Size == 64) +    return APFloat(Val); +  if (Size != 16) +    llvm_unreachable("Unsupported FPConstant size"); +  bool Ignored; +  APFloat APF(Val); +  APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); +  return APF; +} + +void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { +  AU.addPreserved<StackProtector>(); +} diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index 3888226fa059..ca56f4e0c4f1 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -70,7 +70,6 @@  #include "llvm/ADT/Triple.h"  #include "llvm/ADT/Twine.h"  #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" @@ -89,6 +88,7 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include <algorithm>  #include <cassert> @@ -159,13 +159,13 @@ namespace {      bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,                   Module &M, bool isConst, unsigned AddrSpace) const; -    /// \brief Merge everything in \p Globals for which the corresponding bit +    /// Merge everything in \p Globals for which the corresponding bit      /// in \p GlobalSet is set.      bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,                   const BitVector &GlobalSet, Module &M, bool isConst,                   unsigned AddrSpace) const; -    /// \brief Check if the given variable has been identified as must keep +    /// Check if the given variable has been identified as must keep      /// \pre setMustKeepGlobalVariables must have been called on the Module that      ///      contains GV      bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { @@ -177,7 +177,7 @@ namespace {      void setMustKeepGlobalVariables(Module &M);      /// Collect every variables marked as "used" -    void collectUsedGlobalVariables(Module &M); +    void collectUsedGlobalVariables(Module &M, StringRef Name);      /// Keep track of the GlobalVariable that must not be merged away      SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; @@ -242,7 +242,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,    // code (currently, a Function) to the set of globals seen so far that are    // used together in that unit (GlobalUsesByFunction).    // -  // When we look at the Nth global, we now that any new set is either: +  // When we look at the Nth global, we know that any new set is either:    // - the singleton set {N}, containing this global only, or    // - the union of {N} and a previously-discovered set, containing some    //   combination of the previous N-1 globals. @@ -440,28 +440,44 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,    assert(Globals.size() > 1);    Type *Int32Ty = Type::getInt32Ty(M.getContext()); +  Type *Int8Ty = Type::getInt8Ty(M.getContext());    auto &DL = M.getDataLayout(); -  DEBUG(dbgs() << " Trying to merge set, starts with #" -               << GlobalSet.find_first() << "\n"); +  LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #" +                    << GlobalSet.find_first() << "\n"); +  bool Changed = false;    ssize_t i = GlobalSet.find_first();    while (i != -1) {      ssize_t j = 0;      uint64_t MergedSize = 0;      std::vector<Type*> Tys;      std::vector<Constant*> Inits; +    std::vector<unsigned> StructIdxs;      bool HasExternal = false;      StringRef FirstExternalName; +    unsigned MaxAlign = 1; +    unsigned CurIdx = 0;      for (j = i; j != -1; j = GlobalSet.find_next(j)) {        Type *Ty = Globals[j]->getValueType(); +      unsigned Align = DL.getPreferredAlignment(Globals[j]); +      unsigned Padding = alignTo(MergedSize, Align) - MergedSize; +      MergedSize += Padding;        MergedSize += DL.getTypeAllocSize(Ty);        if (MergedSize > MaxOffset) {          break;        } +      if (Padding) { +        Tys.push_back(ArrayType::get(Int8Ty, Padding)); +        Inits.push_back(ConstantAggregateZero::get(Tys.back())); +        ++CurIdx; +      }        Tys.push_back(Ty);        Inits.push_back(Globals[j]->getInitializer()); +      StructIdxs.push_back(CurIdx++); + +      MaxAlign = std::max(MaxAlign, Align);        if (Globals[j]->hasExternalLinkage() && !HasExternal) {          HasExternal = true; @@ -469,12 +485,19 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,        }      } +    // Exit early if there is only one global to merge. +    if (Tys.size() < 2) { +      i = j; +      continue; +    } +      // If merged variables doesn't have external linkage, we needn't to expose      // the symbol after merging.      GlobalValue::LinkageTypes Linkage = HasExternal                                              ? GlobalValue::ExternalLinkage                                              : GlobalValue::InternalLinkage; -    StructType *MergedTy = StructType::get(M.getContext(), Tys); +    // Use a packed struct so we can control alignment. +    StructType *MergedTy = StructType::get(M.getContext(), Tys, true);      Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);      // On Darwin external linkage needs to be preserved, otherwise @@ -492,19 +515,23 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,          M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,          GlobalVariable::NotThreadLocal, AddrSpace); -    const StructLayout *MergedLayout = DL.getStructLayout(MergedTy); +    MergedGV->setAlignment(MaxAlign); +    const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);      for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {        GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();        std::string Name = Globals[k]->getName(); +      GlobalValue::DLLStorageClassTypes DLLStorage = +          Globals[k]->getDLLStorageClass();        // Copy metadata while adjusting any debug info metadata by the original        // global's offset within the merged global. -      MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx)); +      MergedGV->copyMetadata(Globals[k], +                             MergedLayout->getElementOffset(StructIdxs[idx]));        Constant *Idx[2] = { -        ConstantInt::get(Int32Ty, 0), -        ConstantInt::get(Int32Ty, idx), +          ConstantInt::get(Int32Ty, 0), +          ConstantInt::get(Int32Ty, StructIdxs[idx]),        };        Constant *GEP =            ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); @@ -517,20 +544,23 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,        // It's not safe on Mach-O as the alias (and thus the portion of the        // MergedGlobals variable) may be dead stripped at link time.        if (Linkage != GlobalValue::InternalLinkage || !IsMachO) { -        GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M); +        GlobalAlias *GA = GlobalAlias::create(Tys[StructIdxs[idx]], AddrSpace, +                                              Linkage, Name, GEP, &M); +        GA->setDLLStorageClass(DLLStorage);        }        NumMerged++;      } +    Changed = true;      i = j;    } -  return true; +  return Changed;  } -void GlobalMerge::collectUsedGlobalVariables(Module &M) { +void GlobalMerge::collectUsedGlobalVariables(Module &M, StringRef Name) {    // Extract global variables from llvm.used array -  const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); +  const GlobalVariable *GV = M.getGlobalVariable(Name);    if (!GV || !GV->hasInitializer()) return;    // Should be an array of 'i8*'. @@ -543,7 +573,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {  }  void GlobalMerge::setMustKeepGlobalVariables(Module &M) { -  collectUsedGlobalVariables(M); +  collectUsedGlobalVariables(M, "llvm.used"); +  collectUsedGlobalVariables(M, "llvm.compiler.used");    for (Function &F : M) {      for (BasicBlock &BB : F) { @@ -577,8 +608,7 @@ bool GlobalMerge::doInitialization(Module &M) {    for (auto &GV : M.globals()) {      // Merge is safe for "normal" internal or external globals only      if (GV.isDeclaration() || GV.isThreadLocal() || -        GV.hasSection() || GV.hasImplicitSection() || -        GV.hasDLLExportStorageClass()) +        GV.hasSection() || GV.hasImplicitSection())        continue;      // It's not safe to merge globals that may be preempted @@ -594,12 +624,6 @@ bool GlobalMerge::doInitialization(Module &M) {      unsigned AddressSpace = PT->getAddressSpace(); -    // Ignore fancy-aligned globals for now. -    unsigned Alignment = DL.getPreferredAlignment(&GV); -    Type *Ty = GV.getValueType(); -    if (Alignment > DL.getABITypeAlignment(Ty)) -      continue; -      // Ignore all 'special' globals.      if (GV.getName().startswith("llvm.") ||          GV.getName().startswith(".llvm.")) @@ -609,6 +633,7 @@ bool GlobalMerge::doInitialization(Module &M) {      if (isMustKeepGlobalVariable(&GV))        continue; +    Type *Ty = GV.getValueType();      if (DL.getTypeAllocSize(Ty) < MaxOffset) {        if (TM &&            TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index d8ce90e63a9d..f12d00071b24 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -252,7 +252,7 @@ namespace {          BBInfo &TrueBBI, BBInfo &FalseBBI) const;      void AnalyzeBlock(MachineBasicBlock &MBB,                        std::vector<std::unique_ptr<IfcvtToken>> &Tokens); -    bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, +    bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Pred,                               bool isTriangle = false, bool RevBranch = false,                               bool hasCommonTail = false);      void AnalyzeBlocks(MachineFunction &MF, @@ -347,7 +347,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {    BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());    MBPI = &getAnalysis<MachineBranchProbabilityInfo>();    MRI = &MF.getRegInfo(); -  SchedModel.init(ST.getSchedModel(), &ST, TII); +  SchedModel.init(&ST);    if (!TII) return false; @@ -361,14 +361,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {                                     getAnalysisIfAvailable<MachineModuleInfo>());    } -  DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum <<  ") \'" -               << MF.getName() << "\'"); +  LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" +                    << MF.getName() << "\'");    if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { -    DEBUG(dbgs() << " skipped\n"); +    LLVM_DEBUG(dbgs() << " skipped\n");      return false;    } -  DEBUG(dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "\n");    MF.RenumberBlocks();    BBAnalysis.resize(MF.getNumBlockIDs()); @@ -406,14 +406,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {        case ICSimpleFalse: {          bool isFalse = Kind == ICSimpleFalse;          if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; -        DEBUG(dbgs() << "Ifcvt (Simple" -                     << (Kind == ICSimpleFalse ? " false" : "") -                     << "): " << printMBBReference(*BBI.BB) << " (" -                     << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() -                                                 : BBI.TrueBB->getNumber()) -                     << ") "); +        LLVM_DEBUG(dbgs() << "Ifcvt (Simple" +                          << (Kind == ICSimpleFalse ? " false" : "") +                          << "): " << printMBBReference(*BBI.BB) << " (" +                          << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() +                                                      : BBI.TrueBB->getNumber()) +                          << ") ");          RetVal = IfConvertSimple(BBI, Kind); -        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); +        LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");          if (RetVal) {            if (isFalse) ++NumSimpleFalse;            else         ++NumSimple; @@ -430,16 +430,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {          if (DisableTriangleR && !isFalse && isRev) break;          if (DisableTriangleF && isFalse && !isRev) break;          if (DisableTriangleFR && isFalse && isRev) break; -        DEBUG(dbgs() << "Ifcvt (Triangle"); +        LLVM_DEBUG(dbgs() << "Ifcvt (Triangle");          if (isFalse) -          DEBUG(dbgs() << " false"); +          LLVM_DEBUG(dbgs() << " false");          if (isRev) -          DEBUG(dbgs() << " rev"); -        DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB) -                     << " (T:" << BBI.TrueBB->getNumber() -                     << ",F:" << BBI.FalseBB->getNumber() << ") "); +          LLVM_DEBUG(dbgs() << " rev"); +        LLVM_DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB) +                          << " (T:" << BBI.TrueBB->getNumber() +                          << ",F:" << BBI.FalseBB->getNumber() << ") ");          RetVal = IfConvertTriangle(BBI, Kind); -        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); +        LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");          if (RetVal) {            if (isFalse) {              if (isRev) ++NumTriangleFRev; @@ -453,24 +453,25 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {        }        case ICDiamond:          if (DisableDiamond) break; -        DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB) -                     << " (T:" << BBI.TrueBB->getNumber() -                     << ",F:" << BBI.FalseBB->getNumber() << ") "); +        LLVM_DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB) +                          << " (T:" << BBI.TrueBB->getNumber() +                          << ",F:" << BBI.FalseBB->getNumber() << ") ");          RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,                                    Token->TClobbersPred,                                    Token->FClobbersPred); -        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); +        LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");          if (RetVal) ++NumDiamonds;          break;        case ICForkedDiamond:          if (DisableForkedDiamond) break; -        DEBUG(dbgs() << "Ifcvt (Forked Diamond): " << printMBBReference(*BBI.BB) -                     << " (T:" << BBI.TrueBB->getNumber() -                     << ",F:" << BBI.FalseBB->getNumber() << ") "); +        LLVM_DEBUG(dbgs() << "Ifcvt (Forked Diamond): " +                          << printMBBReference(*BBI.BB) +                          << " (T:" << BBI.TrueBB->getNumber() +                          << ",F:" << BBI.FalseBB->getNumber() << ") ");          RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,                                        Token->TClobbersPred,                                        Token->FClobbersPred); -        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); +        LLVM_DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");          if (RetVal) ++NumForkedDiamonds;          break;        } @@ -948,7 +949,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI,    BBI.ExtraCost2 = 0;    BBI.ClobbersPred = false;    for (MachineInstr &MI : make_range(Begin, End)) { -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue;      // It's unsafe to duplicate convergent instructions in this context, so set @@ -1726,14 +1727,14 @@ bool IfConverter::IfConvertDiamondCommon(    for (unsigned i = 0; i < NumDups1; ++DI1) {      if (DI1 == MBB1.end())        break; -    if (!DI1->isDebugValue()) +    if (!DI1->isDebugInstr())        ++i;    }    while (NumDups1 != 0) {      ++DI2;      if (DI2 == MBB2.end())        break; -    if (!DI2->isDebugValue()) +    if (!DI2->isDebugInstr())        --NumDups1;    } @@ -1767,7 +1768,7 @@ bool IfConverter::IfConvertDiamondCommon(      assert(DI1 != MBB1.begin());      --DI1;      // skip dbg_value instructions -    if (!DI1->isDebugValue()) +    if (!DI1->isDebugInstr())        ++i;    }    MBB1.erase(DI1, MBB1.end()); @@ -1782,7 +1783,7 @@ bool IfConverter::IfConvertDiamondCommon(      // instructions could be found.      while (DI2 != MBB2.begin()) {        MachineBasicBlock::iterator Prev = std::prev(DI2); -      if (!Prev->isBranch() && !Prev->isDebugValue()) +      if (!Prev->isBranch() && !Prev->isDebugInstr())          break;        DI2 = Prev;      } @@ -1793,7 +1794,7 @@ bool IfConverter::IfConvertDiamondCommon(      assert(DI2 != MBB2.begin());      --DI2;      // skip dbg_value instructions -    if (!DI2->isDebugValue()) +    if (!DI2->isDebugInstr())        --NumDups2;    } @@ -1809,7 +1810,7 @@ bool IfConverter::IfConvertDiamondCommon(    SmallSet<unsigned, 4> ExtUses;    if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {      for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) { -      if (FI.isDebugValue()) +      if (FI.isDebugInstr())          continue;        SmallVector<unsigned, 4> Defs;        for (const MachineOperand &MO : FI.operands()) { @@ -2002,7 +2003,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,    bool AnyUnpred = false;    bool MaySpec = LaterRedefs != nullptr;    for (MachineInstr &I : make_range(BBI.BB->begin(), E)) { -    if (I.isDebugValue() || TII->isPredicated(I)) +    if (I.isDebugInstr() || TII->isPredicated(I))        continue;      // It may be possible not to predicate an instruction if it's the 'true'      // side of a diamond and the 'false' side may re-define the instruction's @@ -2058,7 +2059,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,        ToBBI.ExtraCost += NumCycles-1;      ToBBI.ExtraCost2 += ExtraPredCost; -    if (!TII->isPredicated(I) && !MI->isDebugValue()) { +    if (!TII->isPredicated(I) && !MI->isDebugInstr()) {        if (!TII->PredicateInstruction(*MI, Cond)) {  #ifndef NDEBUG          dbgs() << "Unable to predicate " << I << "!\n"; diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 308b6d293d3d..0a447bc613b1 100644 --- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -115,7 +115,7 @@ class ImplicitNullChecks : public MachineFunctionPass {    /// \c canHandle should return true for all instructions in \p    /// Insts.    DependenceResult computeDependence(const MachineInstr *MI, -                                     ArrayRef<MachineInstr *> Insts); +                                     ArrayRef<MachineInstr *> Block);    /// Represents one null check that can be made implicit.    class NullCheck { @@ -134,7 +134,7 @@ class ImplicitNullChecks : public MachineFunctionPass {      // The block branched to if the pointer is null.      MachineBasicBlock *NullSucc; -    // If this is non-null, then MemOperation has a dependency on on this +    // If this is non-null, then MemOperation has a dependency on this      // instruction; and it needs to be hoisted to execute before MemOperation.      MachineInstr *OnlyDependency; @@ -198,7 +198,7 @@ class ImplicitNullChecks : public MachineFunctionPass {    SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,                                         ArrayRef<MachineInstr *> PrevInsts); -  /// Return true if \p FaultingMI can be hoisted from after the the +  /// Return true if \p FaultingMI can be hoisted from after the    /// instructions in \p InstsSeenSoFar to before them.  Set \p Dependence to a    /// non-null value if we also need to (and legally can) hoist a depedency.    bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg, @@ -496,6 +496,32 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(    if (NotNullSucc->pred_size() != 1)      return false; +  // To prevent the invalid transformation of the following code: +  // +  //   mov %rax, %rcx +  //   test %rax, %rax +  //   %rax = ... +  //   je throw_npe +  //   mov(%rcx), %r9 +  //   mov(%rax), %r10 +  // +  // into: +  // +  //   mov %rax, %rcx +  //   %rax = .... +  //   faulting_load_op("movl (%rax), %r10", throw_npe) +  //   mov(%rcx), %r9 +  // +  // we must ensure that there are no instructions between the 'test' and +  // conditional jump that modify %rax. +  const unsigned PointerReg = MBP.LHS.getReg(); + +  assert(MBP.ConditionDef->getParent() ==  &MBB && "Should be in basic block"); + +  for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I) +    if (I->modifiesRegister(PointerReg, TRI)) +      return false; +    // Starting with a code fragment like:    //    //   test %rax, %rax @@ -550,8 +576,6 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(    // ptr could be some non-null invalid reference that never gets loaded from    // because some_cond is always true. -  const unsigned PointerReg = MBP.LHS.getReg(); -    SmallVector<MachineInstr *, 8> InstsSeenSoFar;    for (auto &MI : *NotNullSucc) { @@ -596,9 +620,8 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr(    unsigned DefReg = NoRegister;    if (NumDefs != 0) { -    DefReg = MI->defs().begin()->getReg(); -    assert(std::distance(MI->defs().begin(), MI->defs().end()) == 1 && -           "expected exactly one def!"); +    DefReg = MI->getOperand(0).getReg(); +    assert(NumDefs == 1 && "expected exactly one def!");    }    FaultMaps::FaultKind FK; diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 86ce4b7a9464..007e9283d833 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -46,6 +46,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/BlockFrequency.h"  #include "llvm/Support/BranchProbability.h"  #include "llvm/Support/CommandLine.h" @@ -335,7 +336,7 @@ void InlineSpiller::collectRegsToSpill() {      if (isRegToSpill(SnipReg))        continue;      RegsToSpill.push_back(SnipReg); -    DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); +    LLVM_DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');      ++NumSnippets;    }  } @@ -387,8 +388,8 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,    LiveInterval &OrigLI = LIS.getInterval(Original);    VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);    StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); -  DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " -               << *StackInt << '\n'); +  LLVM_DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " +                    << *StackInt << '\n');    // We are going to spill SrcVNI immediately after its def, so clear out    // any later spills of the same value. @@ -409,7 +410,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,                            MRI.getRegClass(SrcReg), &TRI);    --MII; // Point to store instruction.    LIS.InsertMachineInstrInMaps(*MII); -  DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); +  LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);    HSpiller.addToMergeableSpills(*MII, StackSlot, Original);    ++NumSpills; @@ -428,8 +429,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {      LiveInterval *LI;      std::tie(LI, VNI) = WorkList.pop_back_val();      unsigned Reg = LI->reg; -    DEBUG(dbgs() << "Checking redundant spills for " -                 << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); +    LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' +                      << VNI->def << " in " << *LI << '\n');      // Regs to spill are taken care of.      if (isRegToSpill(Reg)) @@ -437,7 +438,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {      // Add all of VNI's live range to StackInt.      StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); -    DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); +    LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');      // Find all spills and copies of VNI.      for (MachineRegisterInfo::use_instr_nodbg_iterator @@ -465,7 +466,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {        // Erase spills.        int FI;        if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { -        DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI); +        LLVM_DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI);          // eliminateDeadDefs won't normally remove stores, so switch opcode.          MI.setDesc(TII.get(TargetOpcode::KILL));          DeadDefs.push_back(&MI); @@ -527,13 +528,13 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {    VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());    if (!ParentVNI) { -    DEBUG(dbgs() << "\tadding <undef> flags: "); +    LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {        MachineOperand &MO = MI.getOperand(i);        if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)          MO.setIsUndef();      } -    DEBUG(dbgs() << UseIdx << '\t' << MI); +    LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);      return true;    } @@ -547,7 +548,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {    if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {      markValueUsed(&VirtReg, ParentVNI); -    DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); +    LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);      return false;    } @@ -555,7 +556,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {    // same register for uses and defs.    if (RI.Tied) {      markValueUsed(&VirtReg, ParentVNI); -    DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI); +    LLVM_DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI);      return false;    } @@ -581,8 +582,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {    NewMI->setDebugLoc(MI.getDebugLoc());    (void)DefIdx; -  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t' -               << *LIS.getInstructionFromIndex(DefIdx)); +  LLVM_DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t' +                    << *LIS.getInstructionFromIndex(DefIdx));    // Replace operands    for (const auto &OpPair : Ops) { @@ -592,7 +593,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {        MO.setIsKill();      }    } -  DEBUG(dbgs() << "\t        " << UseIdx << '\t' << MI << '\n'); +  LLVM_DEBUG(dbgs() << "\t        " << UseIdx << '\t' << MI << '\n');    ++NumRemats;    return true; @@ -619,6 +620,9 @@ void InlineSpiller::reMaterializeAll() {        if (MI.isDebugValue())          continue; +      assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " +             "instruction that isn't a DBG_VALUE"); +        anyRemat |= reMaterializeFor(LI, MI);      }    } @@ -637,7 +641,7 @@ void InlineSpiller::reMaterializeAll() {        MI->addRegisterDead(Reg, &TRI);        if (!MI->allDefsAreDead())          continue; -      DEBUG(dbgs() << "All defs dead: " << *MI); +      LLVM_DEBUG(dbgs() << "All defs dead: " << *MI);        DeadDefs.push_back(MI);      }    } @@ -646,7 +650,7 @@ void InlineSpiller::reMaterializeAll() {    // deleted here.    if (DeadDefs.empty())      return; -  DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); +  LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");    Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);    // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions @@ -669,7 +673,8 @@ void InlineSpiller::reMaterializeAll() {      RegsToSpill[ResultPos++] = Reg;    }    RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); -  DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); +  LLVM_DEBUG(dbgs() << RegsToSpill.size() +                    << " registers to spill after remat.\n");  }  //===----------------------------------------------------------------------===// @@ -691,7 +696,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {    if (!IsLoad)      HSpiller.rmFromMergeableSpills(*MI, StackSlot); -  DEBUG(dbgs() << "Coalescing stack access: " << *MI); +  LLVM_DEBUG(dbgs() << "Coalescing stack access: " << *MI);    LIS.RemoveMachineInstrFromMaps(*MI);    MI->eraseFromParent(); @@ -848,8 +853,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,          FoldMI->RemoveOperand(i - 1);      } -  DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, -                                           "folded")); +  LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, +                                                "folded"));    if (!WasCopy)      ++NumFolded; @@ -872,8 +877,8 @@ void InlineSpiller::insertReload(unsigned NewVReg,    LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); -  DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", -                                           NewVReg)); +  LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", +                                                NewVReg));    ++NumReloads;  } @@ -912,8 +917,8 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,    LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); -  DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, -                                           "spill")); +  LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, +                                                "spill"));    ++NumSpills;    if (IsRealSpill)      HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); @@ -921,7 +926,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,  /// spillAroundUses - insert spill code around each use of Reg.  void InlineSpiller::spillAroundUses(unsigned Reg) { -  DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); +  LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n');    LiveInterval &OldLI = LIS.getInterval(Reg);    // Iterate over instructions using Reg. @@ -934,12 +939,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {      if (MI->isDebugValue()) {        // Modify DBG_VALUE now that the value is in a spill slot.        MachineBasicBlock *MBB = MI->getParent(); -      DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); +      LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);        buildDbgValueForSpill(*MBB, MI, *MI, StackSlot);        MBB->erase(MI);        continue;      } +    assert(!MI->isDebugInstr() && "Did not expect to find a use in debug " +           "instruction that isn't a DBG_VALUE"); +      // Ignore copies to/from snippets. We'll delete them.      if (SnippetCopies.count(MI))        continue; @@ -965,7 +973,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {      if (SibReg && isSibling(SibReg)) {        // This may actually be a copy between snippets.        if (isRegToSpill(SibReg)) { -        DEBUG(dbgs() << "Found new snippet copy: " << *MI); +        LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI);          SnippetCopies.insert(MI);          continue;        } @@ -1008,7 +1016,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {            hasLiveDef = true;        }      } -    DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); +    LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');      // FIXME: Use a second vreg if instruction has no tied ops.      if (RI.Writes) @@ -1034,7 +1042,7 @@ void InlineSpiller::spillAll() {    for (unsigned Reg : RegsToSpill)      StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),                                       StackInt->getValNumInfo(0)); -  DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); +  LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');    // Spill around uses of all RegsToSpill.    for (unsigned Reg : RegsToSpill) @@ -1042,7 +1050,7 @@ void InlineSpiller::spillAll() {    // Hoisted spills may cause dead code.    if (!DeadDefs.empty()) { -    DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); +    LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");      Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);    } @@ -1074,10 +1082,10 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {    StackSlot = VRM.getStackSlot(Original);    StackInt = nullptr; -  DEBUG(dbgs() << "Inline spilling " -               << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) -               << ':' << edit.getParent() -               << "\nFrom original " << printReg(Original) << '\n'); +  LLVM_DEBUG(dbgs() << "Inline spilling " +                    << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) +                    << ':' << edit.getParent() << "\nFrom original " +                    << printReg(Original) << '\n');    assert(edit.getParent().isSpillable() &&           "Attempting to spill already spilled value.");    assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); @@ -1261,11 +1269,11 @@ void HoistSpillHelper::getVisitOrders(           "Orders have different size with WorkSet");  #ifndef NDEBUG -  DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n"); +  LLVM_DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");    SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();    for (; RIt != Orders.rend(); RIt++) -    DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ","); -  DEBUG(dbgs() << "\n"); +    LLVM_DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ","); +  LLVM_DEBUG(dbgs() << "\n");  #endif  } @@ -1374,7 +1382,7 @@ void HoistSpillHelper::runHoistSpills(        // Current Block is the BB containing the new hoisted spill. Add it to        // SpillsToKeep. LiveReg is the source of the new spill.        SpillsToKeep[*RIt] = LiveReg; -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "spills in BB: ";          for (const auto Rspill : SpillsInSubTree)            dbgs() << Rspill->getBlock()->getNumber() << " "; @@ -1430,7 +1438,7 @@ void HoistSpillHelper::hoistAllSpills() {      if (Ent.second.empty())        continue; -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"               << "Equal spills in BB: ";        for (const auto spill : EqValSpills) @@ -1445,7 +1453,7 @@ void HoistSpillHelper::hoistAllSpills() {      runHoistSpills(OrigLI, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns); -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "Finally inserted spills in BB: ";        for (const auto Ispill : SpillsToIns)          dbgs() << Ispill.first->getNumber() << " "; diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index 72227cc7bba9..82f6e8d8e234 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -48,8 +48,8 @@ void InterferenceCache::reinitPhysRegEntries() {    if (PhysRegEntriesCount == TRI->getNumRegs()) return;    free(PhysRegEntries);    PhysRegEntriesCount = TRI->getNumRegs(); -  PhysRegEntries = (unsigned char*) -    calloc(PhysRegEntriesCount, sizeof(unsigned char)); +  PhysRegEntries = static_cast<unsigned char*>( +      safe_calloc(PhysRegEntriesCount, sizeof(unsigned char)));  }  void InterferenceCache::init(MachineFunction *mf, diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 9c906d309639..fd2ff162630a 100644 --- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -104,15 +104,15 @@ private:    /// The maximum supported interleave factor.    unsigned MaxFactor; -  /// \brief Transform an interleaved load into target specific intrinsics. +  /// Transform an interleaved load into target specific intrinsics.    bool lowerInterleavedLoad(LoadInst *LI,                              SmallVector<Instruction *, 32> &DeadInsts); -  /// \brief Transform an interleaved store into target specific intrinsics. +  /// Transform an interleaved store into target specific intrinsics.    bool lowerInterleavedStore(StoreInst *SI,                               SmallVector<Instruction *, 32> &DeadInsts); -  /// \brief Returns true if the uses of an interleaved load by the +  /// Returns true if the uses of an interleaved load by the    /// extractelement instructions in \p Extracts can be replaced by uses of the    /// shufflevector instructions in \p Shuffles instead. If so, the necessary    /// replacements are also performed. @@ -136,7 +136,7 @@ FunctionPass *llvm::createInterleavedAccessPass() {    return new InterleavedAccess();  } -/// \brief Check if the mask is a DE-interleave mask of the given factor +/// Check if the mask is a DE-interleave mask of the given factor  /// \p Factor like:  ///     <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>  static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor, @@ -158,7 +158,7 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,    return false;  } -/// \brief Check if the mask is a DE-interleave mask for an interleaved load. +/// Check if the mask is a DE-interleave mask for an interleaved load.  ///  /// E.g. DE-interleave masks (Factor = 2) could be:  ///     <0, 2, 4, 6>    (mask of index 0 to extract even elements) @@ -176,7 +176,7 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,    return false;  } -/// \brief Check if the mask can be used in an interleaved store. +/// Check if the mask can be used in an interleaved store.  //  /// It checks for a more general pattern than the RE-interleave mask.  /// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...> @@ -332,7 +332,7 @@ bool InterleavedAccess::lowerInterleavedLoad(    if (!tryReplaceExtracts(Extracts, Shuffles))      return false; -  DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); +  LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");    // Try to create target specific intrinsics to replace the load and shuffles.    if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) @@ -424,7 +424,7 @@ bool InterleavedAccess::lowerInterleavedStore(    if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))      return false; -  DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); +  LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");    // Try to create target specific intrinsics to replace the store and shuffle.    if (!TLI->lowerInterleavedStore(SI, SVI, Factor)) @@ -441,7 +441,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {    if (!TPC || !LowerInterleavedAccesses)      return false; -  DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); +  LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();    auto &TM = TPC->getTM<TargetMachine>(); diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 12777d5ed110..eb4099964242 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -456,6 +456,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {    }    case Intrinsic::dbg_declare: +  case Intrinsic::dbg_label:      break;    // Simply strip out debugging intrinsics    case Intrinsic::eh_typeid_for: diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 4c6e21ab315a..2cd389ce2c11 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -16,7 +16,6 @@  #include "llvm/CodeGen/BasicTTIImpl.h"  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/IR/LegacyPassManager.h"  #include "llvm/MC/MCAsmBackend.h" @@ -24,16 +23,22 @@  #include "llvm/MC/MCCodeEmitter.h"  #include "llvm/MC/MCContext.h"  #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h"  #include "llvm/MC/MCStreamer.h"  #include "llvm/MC/MCSubtargetInfo.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/FormattedStream.h"  #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h"  using namespace llvm; +static cl::opt<bool> EnableTrapUnreachable("trap-unreachable", +  cl::Hidden, cl::ZeroOrMore, cl::init(false), +  cl::desc("Enable generating trap for unreachable")); +  void LLVMTargetMachine::initAsmInfo() {    MRI = TheTarget.createMCRegInfo(getTargetTriple().str());    MII = TheTarget.createMCInstrInfo(); @@ -79,6 +84,9 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,    this->RM = RM;    this->CMModel = CM;    this->OptLevel = OL; + +  if (EnableTrapUnreachable) +    this->Options.TrapUnreachable = true;  }  TargetTransformInfo @@ -113,8 +121,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,  }  bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, -    raw_pwrite_stream &Out, CodeGenFileType FileType, -    MCContext &Context) { +                                      raw_pwrite_stream &Out, +                                      raw_pwrite_stream *DwoOut, +                                      CodeGenFileType FileType, +                                      MCContext &Context) {    if (Options.MCOptions.MCSaveTempLabels)      Context.setAllowTemporaryLabels(false); @@ -131,17 +141,17 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,          getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI);      // Create a code emitter if asked to show the encoding. -    MCCodeEmitter *MCE = nullptr; +    std::unique_ptr<MCCodeEmitter> MCE;      if (Options.MCOptions.ShowMCEncoding) -      MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); +      MCE.reset(getTarget().createMCCodeEmitter(MII, MRI, Context)); -    MCAsmBackend *MAB = -        getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); +    std::unique_ptr<MCAsmBackend> MAB( +        getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));      auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);      MCStreamer *S = getTarget().createAsmStreamer(          Context, std::move(FOut), Options.MCOptions.AsmVerbose, -        Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, -        Options.MCOptions.ShowMCInst); +        Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE), +        std::move(MAB), Options.MCOptions.ShowMCInst);      AsmStreamer.reset(S);      break;    } @@ -159,7 +169,9 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,      Triple T(getTargetTriple().str());      AsmStreamer.reset(getTarget().createMCObjectStreamer( -        T, Context, std::unique_ptr<MCAsmBackend>(MAB), Out, +        T, Context, std::unique_ptr<MCAsmBackend>(MAB), +        DwoOut ? MAB->createDwoObjectWriter(Out, *DwoOut) +               : MAB->createObjectWriter(Out),          std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,          Options.MCOptions.MCIncrementalLinkerCompatible,          /*DWARFMustBeAtTheEnd*/ true)); @@ -184,6 +196,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,  bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,                                              raw_pwrite_stream &Out, +                                            raw_pwrite_stream *DwoOut,                                              CodeGenFileType FileType,                                              bool DisableVerify,                                              MachineModuleInfo *MMI) { @@ -194,7 +207,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,    if (!Context)      return true; -  if (WillCompleteCodeGenPipeline && addAsmPrinter(PM, Out, FileType, *Context)) +  if (WillCompleteCodeGenPipeline && +      addAsmPrinter(PM, Out, DwoOut, FileType, *Context))      return true;    PM.add(createFreeMachineFunctionPass()); @@ -234,7 +248,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,    const Triple &T = getTargetTriple();    std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( -      T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out, +      T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out),        std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,        Options.MCOptions.MCIncrementalLinkerCompatible,        /*DWARFMustBeAtTheEnd*/ true)); diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index 8ffd51a550fc..5dbce841cfd5 100644 --- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -14,6 +14,7 @@  //===----------------------------------------------------------------------===//  #include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h"  using namespace llvm; @@ -139,3 +140,14 @@ void LatencyPriorityQueue::remove(SUnit *SU) {      std::swap(*I, Queue.back());    Queue.pop_back();  } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { +  dbgs() << "Latency Priority Queue\n"; +  dbgs() << "  Number of Queue Entries: " << Queue.size() << "\n"; +  for (auto const &SU : Queue) { +    dbgs() << "    "; +    SU->dump(DAG); +  } +} +#endif diff --git a/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 996d40ca6e1e..5b52cc66a297 100644 --- a/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -57,23 +57,23 @@ MachineBlockFrequencyInfo &  LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {    auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>();    if (MBFI) { -    DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n"); +    LLVM_DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");      return *MBFI;    }    auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();    auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();    auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); -  DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n"); -  DEBUG(if (MLI) dbgs() << "LoopInfo is available\n"); +  LLVM_DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n"); +  LLVM_DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");    if (!MLI) { -    DEBUG(dbgs() << "Building LoopInfo on the fly\n"); +    LLVM_DEBUG(dbgs() << "Building LoopInfo on the fly\n");      // First create a dominator tree. -    DEBUG(if (MDT) dbgs() << "DominatorTree is available\n"); +    LLVM_DEBUG(if (MDT) dbgs() << "DominatorTree is available\n");      if (!MDT) { -      DEBUG(dbgs() << "Building DominatorTree on the fly\n"); +      LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n");        OwnedMDT = make_unique<MachineDominatorTree>();        OwnedMDT->getBase().recalculate(*MF);        MDT = OwnedMDT.get(); diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index 8c54751ee833..d06821bdfcce 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -20,6 +20,7 @@  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/Metadata.h"  #include "llvm/Support/Casting.h" diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index 19ec281079cb..fea83e92de8f 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -40,6 +40,8 @@  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/Function.h" @@ -64,7 +66,7 @@ using namespace llvm;  STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); -// \brief If @MI is a DBG_VALUE with debug value described by a defined +// If @MI is a DBG_VALUE with debug value described by a defined  // register, returns the number of this register. In the other case, returns 0.  static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {    assert(MI.isDebugValue() && "expected a DBG_VALUE"); @@ -81,6 +83,7 @@ private:    const TargetRegisterInfo *TRI;    const TargetInstrInfo *TII;    const TargetFrameLowering *TFI; +  BitVector CalleeSavedRegs;    LexicalScopes LS;    /// Keeps track of lexical scopes associated with a user value's source @@ -178,11 +181,11 @@ private:    using VarLocMap = UniqueVector<VarLoc>;    using VarLocSet = SparseBitVector<>;    using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>; -  struct SpillDebugPair { -    MachineInstr *SpillInst; +  struct TransferDebugPair { +    MachineInstr *TransferInst;      MachineInstr *DebugInst;    }; -  using SpillMap = SmallVector<SpillDebugPair, 4>; +  using TransferMap = SmallVector<TransferDebugPair, 4>;    /// This holds the working set of currently open ranges. For fast    /// access, this is done both as a set of VarLocIDs, and a map of @@ -235,18 +238,23 @@ private:    bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,                            unsigned &Reg);    int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg); +  void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges, +                               TransferMap &Transfers, VarLocMap &VarLocIDs, +                               unsigned OldVarID, unsigned NewReg = 0);    void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,                            VarLocMap &VarLocIDs);    void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges, -                         VarLocMap &VarLocIDs, SpillMap &Spills); +                         VarLocMap &VarLocIDs, TransferMap &Transfers); +  void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges, +                            VarLocMap &VarLocIDs, TransferMap &Transfers);    void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,                             const VarLocMap &VarLocIDs);    bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,                                VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); -  bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, -                VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, SpillMap &Spills, -                bool transferSpills); +  bool process(MachineInstr &MI, OpenRangesSet &OpenRanges, +               VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, +               TransferMap &Transfers, bool transferChanges);    bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,              const VarLocMap &VarLocIDs, @@ -369,6 +377,54 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,    }  } +/// Create new TransferDebugPair and insert it in \p Transfers. The VarLoc +/// with \p OldVarID should be deleted form \p OpenRanges and replaced with +/// new VarLoc. If \p NewReg is different than default zero value then the +/// new location will be register location created by the copy like instruction, +/// otherwise it is variable's location on the stack. +void LiveDebugValues::insertTransferDebugPair( +    MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers, +    VarLocMap &VarLocIDs, unsigned OldVarID, unsigned NewReg) { +  const MachineInstr *DMI = &VarLocIDs[OldVarID].MI; +  MachineFunction *MF = MI.getParent()->getParent(); +  MachineInstr *NewDMI; +  if (NewReg) { +    // Create a DBG_VALUE instruction to describe the Var in its new +    // register location. +    NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), +                     DMI->isIndirectDebugValue(), NewReg, +                     DMI->getDebugVariable(), DMI->getDebugExpression()); +    if (DMI->isIndirectDebugValue()) +      NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm()); +    LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: "; +               NewDMI->print(dbgs(), false, false, false, TII)); +  } else { +    // Create a DBG_VALUE instruction to describe the Var in its spilled +    // location. +    unsigned SpillBase; +    int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase); +    auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(), +                                            DIExpression::NoDeref, SpillOffset); +    NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, +                     DMI->getDebugVariable(), SpillExpr); +    LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; +               NewDMI->print(dbgs(), false, false, false, TII)); +  } + +  // The newly created DBG_VALUE instruction NewDMI must be inserted after +  // MI. Keep track of the pairing. +  TransferDebugPair MIP = {&MI, NewDMI}; +  Transfers.push_back(MIP); + +  // End all previous ranges of Var. +  OpenRanges.erase(VarLocIDs[OldVarID].Var); + +  // Add the VarLoc to OpenRanges. +  VarLoc VL(*NewDMI, LS); +  unsigned LocID = VarLocIDs.insert(VL); +  OpenRanges.insert(LocID, VL.Var); +} +  /// A definition of a register may mark the end of a range.  void LiveDebugValues::transferRegisterDef(MachineInstr &MI,                                            OpenRangesSet &OpenRanges, @@ -426,28 +482,51 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,          FrameInfo.isSpillSlotObjectIndex(FI)))      return false; -  // In a spill instruction generated by the InlineSpiller the spilled register -  // has its kill flag set. Return false if we don't find such a register. -  Reg = 0; +  auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { +    if (!MO.isReg() || !MO.isUse()) { +      Reg = 0; +      return false; +    } +    Reg = MO.getReg(); +    return MO.isKill(); +  }; +    for (const MachineOperand &MO : MI.operands()) { -    if (MO.isReg() && MO.isUse() && MO.isKill()) { -      Reg = MO.getReg(); -      break; +    // In a spill instruction generated by the InlineSpiller the spilled +    // register has its kill flag set. +    if (isKilledReg(MO, Reg)) +      return true; +    if (Reg != 0) { +      // Check whether next instruction kills the spilled register. +      // FIXME: Current solution does not cover search for killed register in +      // bundles and instructions further down the chain. +      auto NextI = std::next(MI.getIterator()); +      // Skip next instruction that points to basic block end iterator. +      if (MI.getParent()->end() == NextI) +        continue; +      unsigned RegNext; +      for (const MachineOperand &MONext : NextI->operands()) { +        // Return true if we came across the register from the +        // previous spill instruction that is killed in NextI. +        if (isKilledReg(MONext, RegNext) && RegNext == Reg) +          return true; +      }      }    } -  return Reg != 0; +  // Return false if we didn't find spilled register. +  return false;  }  /// A spilled register may indicate that we have to end the current range of  /// a variable and create a new one for the spill location. -/// We don't want to insert any instructions in transfer(), so we just create -/// the DBG_VALUE witout inserting it and keep track of it in @Spills. +/// We don't want to insert any instructions in process(), so we just create +/// the DBG_VALUE without inserting it and keep track of it in \p Transfers.  /// It will be inserted into the BB when we're done iterating over the  /// instructions.  void LiveDebugValues::transferSpillInst(MachineInstr &MI,                                          OpenRangesSet &OpenRanges,                                          VarLocMap &VarLocIDs, -                                        SpillMap &Spills) { +                                        TransferMap &Transfers) {    unsigned Reg;    MachineFunction *MF = MI.getMF();    if (!isSpillInstruction(MI, MF, Reg)) @@ -456,35 +535,49 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI,    // Check if the register is the location of a debug value.    for (unsigned ID : OpenRanges.getVarLocs()) {      if (VarLocIDs[ID].isDescribedByReg() == Reg) { -      DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' -                   << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); - -      // Create a DBG_VALUE instruction to describe the Var in its spilled -      // location, but don't insert it yet to avoid invalidating the -      // iterator in our caller. -      unsigned SpillBase; -      int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase); -      const MachineInstr *DMI = &VarLocIDs[ID].MI; -      auto *SpillExpr = DIExpression::prepend( -          DMI->getDebugExpression(), DIExpression::NoDeref, SpillOffset); -      MachineInstr *SpDMI = -          BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, -                  DMI->getDebugVariable(), SpillExpr); -      DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: "; -            SpDMI->print(dbgs(), false, TII)); - -      // The newly created DBG_VALUE instruction SpDMI must be inserted after -      // MI. Keep track of the pairing. -      SpillDebugPair MIP = {&MI, SpDMI}; -      Spills.push_back(MIP); - -      // End all previous ranges of Var. -      OpenRanges.erase(VarLocIDs[ID].Var); - -      // Add the VarLoc to OpenRanges. -      VarLoc VL(*SpDMI, LS); -      unsigned SpillLocID = VarLocIDs.insert(VL); -      OpenRanges.insert(SpillLocID, VL.Var); +      LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' +                        << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); +      insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID); +      return; +    } +  } +} + +/// If \p MI is a register copy instruction, that copies a previously tracked +/// value from one register to another register that is callee saved, we +/// create new DBG_VALUE instruction  described with copy destination register. +void LiveDebugValues::transferRegisterCopy(MachineInstr &MI, +                                           OpenRangesSet &OpenRanges, +                                           VarLocMap &VarLocIDs, +                                           TransferMap &Transfers) { +  const MachineOperand *SrcRegOp, *DestRegOp; + +  if (!TII->isCopyInstr(MI, SrcRegOp, DestRegOp) || !SrcRegOp->isKill() || +      !DestRegOp->isDef()) +    return; + +  auto isCalleSavedReg = [&](unsigned Reg) { +    for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI) +      if (CalleeSavedRegs.test(*RAI)) +        return true; +    return false; +  }; + +  unsigned SrcReg = SrcRegOp->getReg(); +  unsigned DestReg = DestRegOp->getReg(); + +  // We want to recognize instructions where destination register is callee +  // saved register. If register that could be clobbered by the call is +  // included, there would be a great chance that it is going to be clobbered +  // soon. It is more likely that previous register location, which is callee +  // saved, is going to stay unclobbered longer, even if it is killed. +  if (!isCalleSavedReg(DestReg)) +    return; + +  for (unsigned ID : OpenRanges.getVarLocs()) { +    if (VarLocIDs[ID].isDescribedByReg() == SrcReg) { +      insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, +                              DestReg);        return;      }    } @@ -497,16 +590,18 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,                                               const VarLocMap &VarLocIDs) {    bool Changed = false;    const MachineBasicBlock *CurMBB = MI.getParent(); -  if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) +  if (!(MI.isTerminator() || (&MI == &CurMBB->back())))      return false;    if (OpenRanges.empty())      return false; -  DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) { -          // Copy OpenRanges to OutLocs, if not already present. -          dbgs() << "Add to OutLocs: "; VarLocIDs[ID].dump(); -        }); +  LLVM_DEBUG(for (unsigned ID +                  : OpenRanges.getVarLocs()) { +    // Copy OpenRanges to OutLocs, if not already present. +    dbgs() << "Add to OutLocs: "; +    VarLocIDs[ID].dump(); +  });    VarLocSet &VLS = OutLocs[CurMBB];    Changed = VLS |= OpenRanges.getVarLocs();    OpenRanges.clear(); @@ -514,14 +609,16 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,  }  /// This routine creates OpenRanges and OutLocs. -bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, -                               VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, -                               SpillMap &Spills, bool transferSpills) { +bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges, +                              VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, +                              TransferMap &Transfers, bool transferChanges) {    bool Changed = false;    transferDebugValue(MI, OpenRanges, VarLocIDs);    transferRegisterDef(MI, OpenRanges, VarLocIDs); -  if (transferSpills) -    transferSpillInst(MI, OpenRanges, VarLocIDs, Spills); +  if (transferChanges) { +    transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers); +    transferSpillInst(MI, OpenRanges, VarLocIDs, Transfers); +  }    Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);    return Changed;  } @@ -532,7 +629,7 @@ bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,  bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,                             VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,                             SmallPtrSet<const MachineBasicBlock *, 16> &Visited) { -  DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); +  LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");    bool Changed = false;    VarLocSet InLocsT; // Temporary incoming locations. @@ -583,7 +680,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,    for (auto ID : Diff) {      // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a      // new range is started for the var from the mbb's beginning by inserting -    // a new DBG_VALUE. transfer() will end this range however appropriate. +    // a new DBG_VALUE. process() will end this range however appropriate.      const VarLoc &DiffIt = VarLocIDs[ID];      const MachineInstr *DMI = &DiffIt.MI;      MachineInstr *MI = @@ -592,7 +689,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,                  DMI->getDebugVariable(), DMI->getDebugExpression());      if (DMI->isIndirectDebugValue())        MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); -    DEBUG(dbgs() << "Inserted: "; MI->dump();); +    LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););      ILS.set(ID);      ++NumInserted;      Changed = true; @@ -603,7 +700,7 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,  /// Calculate the liveness information for the given machine function and  /// extend ranges across basic blocks.  bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { -  DEBUG(dbgs() << "\nDebug Range Extension\n"); +  LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");    bool Changed = false;    bool OLChanged = false; @@ -613,7 +710,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {    OpenRangesSet OpenRanges; // Ranges that are open until end of bb.    VarLocInMBB OutLocs;      // Ranges that exist beyond bb.    VarLocInMBB InLocs;       // Ranges that are incoming after joining. -  SpillMap Spills;          // DBG_VALUEs associated with spills. +  TransferMap Transfers;    // DBG_VALUEs associated with spills.    DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;    DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; @@ -624,6 +721,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {                        std::greater<unsigned int>>        Pending; +  enum : bool { dontTransferChanges = false, transferChanges = true }; +    // Initialize every mbb with OutLocs.    // We are not looking at any spill instructions during the initial pass    // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE @@ -631,11 +730,11 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {    // within the BB in which the spill occurs.    for (auto &MBB : MF)      for (auto &MI : MBB) -      transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills, -               /*transferSpills=*/false); +      process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, +              dontTransferChanges); -  DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization", -                         dbgs())); +  LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, +                              "OutLocs after initialization", dbgs()));    ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);    unsigned int RPONumber = 0; @@ -646,7 +745,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {      ++RPONumber;    }    // This is a standard "union of predecessor outs" dataflow problem. -  // To solve it, we perform join() and transfer() using the two worklist method +  // To solve it, we perform join() and process() using the two worklist method    // until the ranges converge.    // Ranges have converged when both worklists are empty.    SmallPtrSet<const MachineBasicBlock *, 16> Visited; @@ -655,7 +754,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {      // thing twice.  We could avoid this with a custom priority queue, but this      // is probably not worth it.      SmallPtrSet<MachineBasicBlock *, 16> OnPending; -    DEBUG(dbgs() << "Processing Worklist\n"); +    LLVM_DEBUG(dbgs() << "Processing Worklist\n");      while (!Worklist.empty()) {        MachineBasicBlock *MBB = OrderToBB[Worklist.top()];        Worklist.pop(); @@ -668,19 +767,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {          // examine spill instructions to see whether they spill registers that          // correspond to user variables.          for (auto &MI : *MBB) -          OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills, -                                /*transferSpills=*/true); +          OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, +                               transferChanges);          // Add any DBG_VALUE instructions necessitated by spills. -        for (auto &SP : Spills) -          MBB->insertAfter(MachineBasicBlock::iterator(*SP.SpillInst), -                           SP.DebugInst); -        Spills.clear(); +        for (auto &TR : Transfers) +          MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst), +                           TR.DebugInst); +        Transfers.clear(); -        DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, -                               "OutLocs after propagating", dbgs())); -        DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, -                               "InLocs after propagating", dbgs())); +        LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, +                                    "OutLocs after propagating", dbgs())); +        LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, +                                    "InLocs after propagating", dbgs()));          if (OLChanged) {            OLChanged = false; @@ -697,8 +796,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {      assert(Pending.empty() && "Pending should be empty");    } -  DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); -  DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); +  LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); +  LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));    return Changed;  } @@ -715,6 +814,8 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {    TRI = MF.getSubtarget().getRegisterInfo();    TII = MF.getSubtarget().getInstrInfo();    TFI = MF.getSubtarget().getFrameLowering(); +  TFI->determineCalleeSaves(MF, CalleeSavedRegs, +                            make_unique<RegScavenger>().get());    LS.initialize(MF);    bool Changed = ExtendRanges(MF); diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 4ffcffcea693..3ff03ec4a7ee 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -44,6 +44,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/Function.h" @@ -223,7 +224,12 @@ public:      return L1;    } -  /// getLocationNo - Return the location number that matches Loc. +  /// Return the location number that matches Loc. +  /// +  /// For undef values we always return location number UndefLocNo without +  /// inserting anything in locations. Since locations is a vector and the +  /// location number is the position in the vector and UndefLocNo is ~0, +  /// we would need a very big vector to put the value at the right position.    unsigned getLocationNo(const MachineOperand &LocMO) {      if (LocMO.isReg()) {        if (LocMO.getReg() == 0) @@ -301,7 +307,7 @@ public:    /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is    /// live. Returns true if any changes were made. -  bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, +  bool splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,                       LiveIntervals &LIS);    /// rewriteLocations - Rewrite virtual register locations according to the @@ -510,7 +516,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {    if (MI.getNumOperands() != 4 ||        !(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) ||        !MI.getOperand(2).isMetadata()) { -    DEBUG(dbgs() << "Can't handle " << MI); +    LLVM_DEBUG(dbgs() << "Can't handle " << MI);      return false;    } @@ -529,8 +535,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {        // The DBG_VALUE is described by a virtual register that does not have a        // live interval. Discard the DBG_VALUE.        Discard = true; -      DEBUG(dbgs() << "Discarding debug info (no LIS interval): " -            << Idx << " " << MI); +      LLVM_DEBUG(dbgs() << "Discarding debug info (no LIS interval): " << Idx +                        << " " << MI);      } else {        // The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg        // is defined dead at Idx (where Idx is the slot index for the instruction @@ -541,8 +547,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {          // We have found a DBG_VALUE with the value in a virtual register that          // is not live. Discard the DBG_VALUE.          Discard = true; -        DEBUG(dbgs() << "Discarding debug info (reg not live): " -              << Idx << " " << MI); +        LLVM_DEBUG(dbgs() << "Discarding debug info (reg not live): " << Idx +                          << " " << MI);        }      }    } @@ -687,7 +693,8 @@ void UserValue::addDefsFromCopies(    if (CopyValues.empty())      return; -  DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); +  LLVM_DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI +                    << '\n');    // Try to add defs of the copied values for each kill point.    for (unsigned i = 0, e = Kills.size(); i != e; ++i) { @@ -701,8 +708,8 @@ void UserValue::addDefsFromCopies(        LocMap::iterator I = locInts.find(Idx);        if (I.valid() && I.start() <= Idx)          continue; -      DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" -                   << DstVNI->id << " in " << *DstLI << '\n'); +      LLVM_DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" +                        << DstVNI->id << " in " << *DstLI << '\n');        MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);        assert(CopyMI && CopyMI->isCopy() && "Bad copy value");        unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); @@ -759,13 +766,6 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,      // function).    } -  // Erase all the undefs. -  for (LocMap::iterator I = locInts.begin(); I.valid();) -    if (I.value().isUndef()) -      I.erase(); -    else -      ++I; -    // The computed intervals may extend beyond the range of the debug    // location's lexical scope. In this case, splitting of an interval    // can result in an interval outside of the scope being created, @@ -850,12 +850,12 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {    MF = &mf;    LIS = &pass.getAnalysis<LiveIntervals>();    TRI = mf.getSubtarget().getRegisterInfo(); -  DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " -               << mf.getName() << " **********\n"); +  LLVM_DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " +                    << mf.getName() << " **********\n");    bool Changed = collectDebugValues(mf);    computeIntervals(); -  DEBUG(print(dbgs())); +  LLVM_DEBUG(print(dbgs()));    ModifiedMF = Changed;    return Changed;  } @@ -901,7 +901,7 @@ LiveDebugVariables::~LiveDebugVariables() {  bool  UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,                           LiveIntervals& LIS) { -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "Splitting Loc" << OldLocNo << '\t';      print(dbgs(), nullptr);    }); @@ -984,17 +984,22 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,    while (LocMapI.valid()) {      DbgValueLocation v = LocMapI.value();      if (v.locNo() == OldLocNo) { -      DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' -                   << LocMapI.stop() << ")\n"); +      LLVM_DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' +                        << LocMapI.stop() << ")\n");        LocMapI.erase();      } else { -      if (v.locNo() > OldLocNo) +      // Undef values always have location number UndefLocNo, so don't change +      // locNo in that case. See getLocationNo(). +      if (!v.isUndef() && v.locNo() > OldLocNo)          LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1));        ++LocMapI;      }    } -  DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);}); +  LLVM_DEBUG({ +    dbgs() << "Split result: \t"; +    print(dbgs(), nullptr); +  });    return DidChange;  } @@ -1094,6 +1099,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI,    // physical register.    for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {      DbgValueLocation Loc = I.value(); +    // Undef values don't exist in locations (and thus not in LocNoMap either) +    // so skip over them. See getLocationNo(). +    if (Loc.isUndef()) +      continue;      unsigned NewLocNo = LocNoMap[Loc.locNo()];      I.setValueUnchecked(Loc.changeLocNo(NewLocNo));      I.setStart(I.start()); @@ -1136,7 +1145,7 @@ findNextInsertLocation(MachineBasicBlock *MBB,    unsigned Reg = LocMO.getReg();    // Find the next instruction in the MBB that define the register Reg. -  while (I != MBB->end()) { +  while (I != MBB->end() && !I->isTerminator()) {      if (!LIS.isNotInMIMap(*I) &&          SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I)))        break; @@ -1158,7 +1167,15 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,    // Only search within the current MBB.    StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx;    MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS); -  MachineOperand &MO = locations[Loc.locNo()]; +  // Undef values don't exist in locations so create new "noreg" register MOs +  // for them. See getLocationNo(). +  MachineOperand MO = !Loc.isUndef() ? +    locations[Loc.locNo()] : +    MachineOperand::CreateReg(/* Reg */ 0, /* isDef */ false, /* isImp */ false, +                              /* isKill */ false, /* isDead */ false, +                              /* isUndef */ false, /* isEarlyClobber */ false, +                              /* SubReg */ 0, /* isDebug */ true); +    ++NumInsertedDebugValues;    assert(cast<DILocalVariable>(Variable) @@ -1179,14 +1196,8 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,    assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");    do { -    MachineInstrBuilder MIB = -      BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) -          .add(MO); -    if (IsIndirect) -      MIB.addImm(0U); -    else -      MIB.addReg(0U, RegState::Debug); -    MIB.addMetadata(Variable).addMetadata(Expr); +    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), +            IsIndirect, MO, Variable, Expr);      // Continue and insert DBG_VALUES after every redefinition of register      // associated with the debug value within the range @@ -1212,11 +1223,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,      if (trimmedDefs.count(Start))        Start = Start.getPrevIndex(); -    DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); +    LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());      MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();      SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); -    DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); +    LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);      insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);      // This interval may span multiple basic blocks.      // Insert a DBG_VALUE into each one. @@ -1226,10 +1237,10 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,        if (++MBB == MFEnd)          break;        MBBEnd = LIS.getMBBEndIdx(&*MBB); -      DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); +      LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd);        insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI);      } -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');      if (MBB == MFEnd)        break; @@ -1238,13 +1249,13 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,  }  void LDVImpl::emitDebugValues(VirtRegMap *VRM) { -  DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); +  LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");    if (!MF)      return;    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();    BitVector SpilledLocations;    for (unsigned i = 0, e = userValues.size(); i != e; ++i) { -    DEBUG(userValues[i]->print(dbgs(), TRI)); +    LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));      userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations);      userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations);    } diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index 302c75133e35..83dd982587c6 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -33,6 +33,7 @@  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/SlotIndexes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" @@ -991,6 +992,7 @@ void LiveInterval::print(raw_ostream &OS) const {    // Print subranges    for (const SubRange &SR : subranges())      OS << SR; +  OS << " weight:" << weight;  }  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 3e742a6c2f21..36428e0335f9 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -187,7 +187,7 @@ void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,    clear();    Size = NSize;    LIUs = static_cast<LiveIntervalUnion*>( -    malloc(sizeof(LiveIntervalUnion)*NSize)); +      safe_malloc(sizeof(LiveIntervalUnion)*NSize));    for (unsigned i = 0; i != Size; ++i)      new(LIUs + i) LiveIntervalUnion(Alloc);  } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm/lib/CodeGen/LiveIntervals.cpp index 79fdba7e062a..471775f8706b 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervals.cpp @@ -37,6 +37,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/MC/MCRegisterInfo.h"  #include "llvm/Pass.h" @@ -147,7 +148,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {      for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)        getRegUnit(i);    } -  DEBUG(dump()); +  LLVM_DEBUG(dump());    return true;  } @@ -310,7 +311,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {  /// entering the entry block or a landing pad.  void LiveIntervals::computeLiveInRegUnits() {    RegUnitRanges.resize(TRI->getNumRegUnits()); -  DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); +  LLVM_DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");    // Keep track of the live range sets allocated.    SmallVector<unsigned, 8> NewRanges; @@ -323,7 +324,7 @@ void LiveIntervals::computeLiveInRegUnits() {      // Create phi-defs at Begin for all live-in registers.      SlotIndex Begin = Indexes->getMBBStartIdx(&MBB); -    DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB)); +    LLVM_DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));      for (const auto &LI : MBB.liveins()) {        for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {          unsigned Unit = *Units; @@ -335,12 +336,12 @@ void LiveIntervals::computeLiveInRegUnits() {          }          VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());          (void)VNI; -        DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id); +        LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << '#' << VNI->id);        }      } -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');    } -  DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); +  LLVM_DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");    // Compute the 'normal' part of the ranges.    for (unsigned Unit : NewRanges) @@ -357,26 +358,40 @@ static void createSegmentsForValues(LiveRange &LR,    }  } -using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>; - -static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, -                                 ShrinkToUsesWorkList &WorkList, -                                 const LiveRange &OldRange) { +void LiveIntervals::extendSegmentsToUses(LiveRange &Segments, +                                         ShrinkToUsesWorkList &WorkList, +                                         unsigned Reg, LaneBitmask LaneMask) {    // Keep track of the PHIs that are in use.    SmallPtrSet<VNInfo*, 8> UsedPHIs;    // Blocks that have already been added to WorkList as live-out.    SmallPtrSet<const MachineBasicBlock*, 16> LiveOut; +  auto getSubRange = [](const LiveInterval &I, LaneBitmask M) +        -> const LiveRange& { +    if (M.none()) +      return I; +    for (const LiveInterval::SubRange &SR : I.subranges()) { +      if ((SR.LaneMask & M).any()) { +        assert(SR.LaneMask == M && "Expecting lane masks to match exactly"); +        return SR; +      } +    } +    llvm_unreachable("Subrange for mask not found"); +  }; + +  const LiveInterval &LI = getInterval(Reg); +  const LiveRange &OldRange = getSubRange(LI, LaneMask); +    // Extend intervals to reach all uses in WorkList.    while (!WorkList.empty()) {      SlotIndex Idx = WorkList.back().first;      VNInfo *VNI = WorkList.back().second;      WorkList.pop_back(); -    const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot()); -    SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB); +    const MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Idx.getPrevSlot()); +    SlotIndex BlockStart = Indexes->getMBBStartIdx(MBB);      // Extend the live range for VNI to be live at Idx. -    if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) { +    if (VNInfo *ExtVNI = Segments.extendInBlock(BlockStart, Idx)) {        assert(ExtVNI == VNI && "Unexpected existing value number");        (void)ExtVNI;        // Is this a PHIDef we haven't seen before? @@ -387,7 +402,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,        for (const MachineBasicBlock *Pred : MBB->predecessors()) {          if (!LiveOut.insert(Pred).second)            continue; -        SlotIndex Stop = Indexes.getMBBEndIdx(Pred); +        SlotIndex Stop = Indexes->getMBBEndIdx(Pred);          // A predecessor is not required to have a live-out value for a PHI.          if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop))            WorkList.push_back(std::make_pair(Stop, PVNI)); @@ -396,24 +411,37 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,      }      // VNI is live-in to MBB. -    DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); -    LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); +    LLVM_DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); +    Segments.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));      // Make sure VNI is live-out from the predecessors.      for (const MachineBasicBlock *Pred : MBB->predecessors()) {        if (!LiveOut.insert(Pred).second)          continue; -      SlotIndex Stop = Indexes.getMBBEndIdx(Pred); -      assert(OldRange.getVNInfoBefore(Stop) == VNI && -             "Wrong value out of predecessor"); -      WorkList.push_back(std::make_pair(Stop, VNI)); +      SlotIndex Stop = Indexes->getMBBEndIdx(Pred); +      if (VNInfo *OldVNI = OldRange.getVNInfoBefore(Stop)) { +        assert(OldVNI == VNI && "Wrong value out of predecessor"); +        (void)OldVNI; +        WorkList.push_back(std::make_pair(Stop, VNI)); +      } else { +#ifndef NDEBUG +        // There was no old VNI. Verify that Stop is jointly dominated +        // by <undef>s for this live range. +        assert(LaneMask.any() && +               "Missing value out of predecessor for main range"); +        SmallVector<SlotIndex,8> Undefs; +        LI.computeSubRangeUndefs(Undefs, LaneMask, *MRI, *Indexes); +        assert(LiveRangeCalc::isJointlyDominated(Pred, Undefs, *Indexes) && +               "Missing value out of predecessor for subrange"); +#endif +      }      }    }  }  bool LiveIntervals::shrinkToUses(LiveInterval *li,                                   SmallVectorImpl<MachineInstr*> *dead) { -  DEBUG(dbgs() << "Shrink: " << *li << '\n'); +  LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');    assert(TargetRegisterInfo::isVirtualRegister(li->reg)           && "Can only shrink virtual registers"); @@ -442,9 +470,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,        // This shouldn't happen: readsVirtualRegister returns true, but there is        // no live value. It is likely caused by a target getting <undef> flags        // wrong. -      DEBUG(dbgs() << Idx << '\t' << UseMI -                   << "Warning: Instr claims to read non-existent value in " -                   << *li << '\n'); +      LLVM_DEBUG( +          dbgs() << Idx << '\t' << UseMI +                 << "Warning: Instr claims to read non-existent value in " +                 << *li << '\n');        continue;      }      // Special case: An early-clobber tied operand reads and writes the @@ -458,14 +487,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,    // Create new live ranges with only minimal live segments per def.    LiveRange NewLR;    createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); -  extendSegmentsToUses(NewLR, *Indexes, WorkList, *li); +  extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone());    // Move the trimmed segments back.    li->segments.swap(NewLR.segments);    // Handle dead values.    bool CanSeparate = computeDeadValues(*li, dead); -  DEBUG(dbgs() << "Shrunk: " << *li << '\n'); +  LLVM_DEBUG(dbgs() << "Shrunk: " << *li << '\n');    return CanSeparate;  } @@ -495,7 +524,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,        // This is a dead PHI. Remove it.        VNI->markUnused();        LI.removeSegment(I); -      DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); +      LLVM_DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");        MayHaveSplitComponents = true;      } else {        // This is a dead def. Make sure the instruction knows. @@ -503,7 +532,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,        assert(MI && "No instruction defining live value");        MI->addRegisterDead(LI.reg, TRI);        if (dead && MI->allDefsAreDead()) { -        DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); +        LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);          dead->push_back(MI);        }      } @@ -512,7 +541,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,  }  void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { -  DEBUG(dbgs() << "Shrink: " << SR << '\n'); +  LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');    assert(TargetRegisterInfo::isVirtualRegister(Reg)           && "Can only shrink virtual registers");    // Find all the values used, including PHI kills. @@ -556,7 +585,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {    // Create a new live ranges with only minimal live segments per def.    LiveRange NewLR;    createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); -  extendSegmentsToUses(NewLR, *Indexes, WorkList, SR); +  extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask);    // Move the trimmed ranges back.    SR.segments.swap(NewLR.segments); @@ -571,13 +600,14 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {        continue;      if (VNI->isPHIDef()) {        // This is a dead PHI. Remove it. -      DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); +      LLVM_DEBUG(dbgs() << "Dead PHI at " << VNI->def +                        << " may separate interval\n");        VNI->markUnused();        SR.removeSegment(*Segment);      }    } -  DEBUG(dbgs() << "Shrunk: " << SR << '\n'); +  LLVM_DEBUG(dbgs() << "Shrunk: " << SR << '\n');  }  void LiveIntervals::extendToIndices(LiveRange &LR, @@ -785,7 +815,7 @@ MachineBasicBlock*  LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {    // A local live range must be fully contained inside the block, meaning it is    // defined and killed at instructions, not at block boundaries. It is not -  // live in or or out of any block. +  // live in or out of any block.    //    // It is technically possible to have a PHI-defined live range identical to a    // single block, but we are going to return false in that case. @@ -942,7 +972,8 @@ public:    /// Update all live ranges touched by MI, assuming a move from OldIdx to    /// NewIdx.    void updateAllRanges(MachineInstr *MI) { -    DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); +    LLVM_DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " +                      << *MI);      bool hasRegMask = false;      for (MachineOperand &MO : MI->operands()) {        if (MO.isRegMask()) @@ -992,7 +1023,7 @@ private:    void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {      if (!Updated.insert(&LR).second)        return; -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "     ";        if (TargetRegisterInfo::isVirtualRegister(Reg)) {          dbgs() << printReg(Reg); @@ -1007,7 +1038,7 @@ private:        handleMoveDown(LR);      else        handleMoveUp(LR, Reg, LaneMask); -    DEBUG(dbgs() << "        -->\t" << LR << '\n'); +    LLVM_DEBUG(dbgs() << "        -->\t" << LR << '\n');      LR.verify();    } @@ -1291,6 +1322,36 @@ private:            if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end))              OldIdxIn->end = NewIdx.getRegSlot();          } +      } else if (OldIdxIn != E +          && SlotIndex::isEarlierInstr(NewIdxOut->start, NewIdx) +          && SlotIndex::isEarlierInstr(NewIdx, NewIdxOut->end)) { +        // OldIdxVNI is a dead def that has been moved into the middle of +        // another value in LR. That can happen when LR is a whole register, +        // but the dead def is a write to a subreg that is dead at NewIdx. +        // The dead def may have been moved across other values +        // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut) +        // down one position. +        //    |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - | +        // => |- X0/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -| +        std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut)); +        // Modify the segment at NewIdxOut and the following segment to meet at +        // the point of the dead def, with the following segment getting +        // OldIdxVNI as its value number. +        *NewIdxOut = LiveRange::Segment( +            NewIdxOut->start, NewIdxDef.getRegSlot(), NewIdxOut->valno); +        *(NewIdxOut + 1) = LiveRange::Segment( +            NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI); +        OldIdxVNI->def = NewIdxDef; +        // Modify subsequent segments to be defined by the moved def OldIdxVNI. +        for (auto Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx) +          Idx->valno = OldIdxVNI; +        // Aggressively remove all dead flags from the former dead definition. +        // Kill/dead flags shouldn't be used while live intervals exist; they +        // will be reinserted by VirtRegRewriter. +        if (MachineInstr *KillMI = LIS.getInstructionFromIndex(NewIdx)) +          for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO) +            if (MO->isReg() && !MO->isUse()) +              MO->setIsDead(false);        } else {          // OldIdxVNI is a dead def. It may have been moved across other values          // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut) @@ -1360,7 +1421,7 @@ private:      MachineBasicBlock::iterator Begin = MBB->begin();      while (MII != Begin) { -      if ((--MII)->isDebugValue()) +      if ((--MII)->isDebugInstr())          continue;        SlotIndex Idx = Indexes->getInstructionIndex(*MII); @@ -1422,7 +1483,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,    for (MachineBasicBlock::iterator I = End; I != Begin;) {      --I;      MachineInstr &MI = *I; -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue;      SlotIndex instrIdx = getInstructionIndex(MI); @@ -1519,7 +1580,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,    for (MachineBasicBlock::iterator I = End; I != Begin;) {      --I;      MachineInstr &MI = *I; -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue;      for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),                                            MOE = MI.operands_end(); @@ -1580,7 +1641,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,    unsigned NumComp = ConEQ.Classify(LI);    if (NumComp <= 1)      return; -  DEBUG(dbgs() << "  Split " << NumComp << " components: " << LI << '\n'); +  LLVM_DEBUG(dbgs() << "  Split " << NumComp << " components: " << LI << '\n');    unsigned Reg = LI.reg;    const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);    for (unsigned I = 1; I < NumComp; ++I) { diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp index 277212cf7dac..86c6c8e29f9a 100644 --- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -18,12 +18,13 @@  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineInstrBundle.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h"  using namespace llvm; -/// \brief Remove all registers from the set that get clobbered by the register +/// Remove all registers from the set that get clobbered by the register  /// mask.  /// The clobbers set will be the list of live registers clobbered  /// by the regmask. @@ -44,7 +45,7 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,  void LivePhysRegs::removeDefs(const MachineInstr &MI) {    for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {      if (O->isReg()) { -      if (!O->isDef()) +      if (!O->isDef() || O->isDebug())          continue;        unsigned Reg = O->getReg();        if (!TargetRegisterInfo::isPhysicalRegister(Reg)) @@ -58,7 +59,7 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) {  /// Add uses to the set.  void LivePhysRegs::addUses(const MachineInstr &MI) {    for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { -    if (!O->isReg() || !O->readsReg()) +    if (!O->isReg() || !O->readsReg() || O->isDebug())        continue;      unsigned Reg = O->getReg();      if (!TargetRegisterInfo::isPhysicalRegister(Reg)) @@ -85,7 +86,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,          SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {    // Remove killed registers from the set.    for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { -    if (O->isReg()) { +    if (O->isReg() && !O->isDebug()) {        unsigned Reg = O->getReg();        if (!TargetRegisterInfo::isPhysicalRegister(Reg))          continue; @@ -105,9 +106,13 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,    // Add defs to the set.    for (auto Reg : Clobbers) { -    // Skip dead defs.  They shouldn't be added to the set. +    // Skip dead defs and registers clobbered by regmasks. They shouldn't +    // be added to the set.      if (Reg.second->isReg() && Reg.second->isDead())        continue; +    if (Reg.second->isRegMask() && +        MachineOperand::clobbersPhysReg(Reg.second->getRegMask(), Reg.first)) +      continue;      addReg(Reg.first);    }  } diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index 66c23b7b69ce..04324943dfad 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -584,3 +584,24 @@ void LiveRangeCalc::updateSSA() {      }    } while (Changed);  } + +bool LiveRangeCalc::isJointlyDominated(const MachineBasicBlock *MBB, +                                       ArrayRef<SlotIndex> Defs, +                                       const SlotIndexes &Indexes) { +  const MachineFunction &MF = *MBB->getParent(); +  BitVector DefBlocks(MF.getNumBlockIDs()); +  for (SlotIndex I : Defs) +    DefBlocks.set(Indexes.getMBBFromIndex(I)->getNumber()); + +  SetVector<unsigned> PredQueue; +  PredQueue.insert(MBB->getNumber()); +  for (unsigned i = 0; i != PredQueue.size(); ++i) { +    unsigned BN = PredQueue[i]; +    if (DefBlocks[BN]) +      return true; +    const MachineBasicBlock *B = MF.getBlockNumbered(BN); +    for (const MachineBasicBlock *P : B->predecessors()) +      PredQueue.insert(P->getNumber()); +  } +  return false; +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index c4914f23f56d..9f226b154a67 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -147,7 +147,7 @@ class LiveRangeCalc {    ///    /// PhysReg, when set, is used to verify live-in lists on basic blocks.    bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, -                        SlotIndex Kill, unsigned PhysReg, +                        SlotIndex Use, unsigned PhysReg,                          ArrayRef<SlotIndex> Undefs);    /// updateSSA - Compute the values that will be live in to all requested @@ -282,6 +282,15 @@ public:    /// Every predecessor of a live-in block must have been given a value with    /// setLiveOutValue, the value may be null for live-trough blocks.    void calculateValues(); + +  /// A diagnostic function to check if the end of the block @p MBB is +  /// jointly dominated by the blocks corresponding to the slot indices +  /// in @p Defs. This function is mainly for use in self-verification +  /// checks. +  LLVM_ATTRIBUTE_UNUSED +  static bool isJointlyDominated(const MachineBasicBlock *MBB, +                                 ArrayRef<SlotIndex> Defs, +                                 const SlotIndexes &Indexes);  };  } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 86cfbd87f5b1..8dfe8b68c3af 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -31,21 +31,24 @@ STATISTIC(NumFracRanges,     "Number of live ranges fractured by DCE");  void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, +                                                     bool createSubRanges) {    unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); -  if (VRM) { +  if (VRM)      VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); -  } +    LiveInterval &LI = LIS.createEmptyInterval(VReg);    if (Parent && !Parent->isSpillable())      LI.markNotSpillable(); -  // Create empty subranges if the OldReg's interval has them. Do not create -  // the main range here---it will be constructed later after the subranges -  // have been finalized. -  LiveInterval &OldLI = LIS.getInterval(OldReg); -  VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); -  for (LiveInterval::SubRange &S : OldLI.subranges()) -    LI.createSubRange(Alloc, S.LaneMask); +  if (createSubRanges) { +    // Create empty subranges if the OldReg's interval has them. Do not create +    // the main range here---it will be constructed later after the subranges +    // have been finalized. +    LiveInterval &OldLI = LIS.getInterval(OldReg); +    VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); +    for (LiveInterval::SubRange &S : OldLI.subranges()) +      LI.createSubRange(Alloc, S.LaneMask); +  }    return LI;  } @@ -217,8 +220,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,    if (!DefMI->isSafeToMove(nullptr, SawStore))      return false; -  DEBUG(dbgs() << "Try to fold single def: " << *DefMI -               << "       into single use: " << *UseMI); +  LLVM_DEBUG(dbgs() << "Try to fold single def: " << *DefMI +                    << "       into single use: " << *UseMI);    SmallVector<unsigned, 8> Ops;    if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) @@ -227,7 +230,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,    MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS);    if (!FoldMI)      return false; -  DEBUG(dbgs() << "                folded: " << *FoldMI); +  LLVM_DEBUG(dbgs() << "                folded: " << *FoldMI);    LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);    UseMI->eraseFromParent();    DefMI->addRegisterDead(LI->reg, nullptr); @@ -264,18 +267,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,    }    // Never delete inline asm.    if (MI->isInlineAsm()) { -    DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); +    LLVM_DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);      return;    }    // Use the same criteria as DeadMachineInstructionElim.    bool SawStore = false;    if (!MI->isSafeToMove(nullptr, SawStore)) { -    DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); +    LLVM_DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);      return;    } -  DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); +  LLVM_DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);    // Collect virtual registers to be erased after MI is gone.    SmallVector<unsigned, 8> RegsToErase; @@ -349,7 +352,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,          continue;        MI->RemoveOperand(i-1);      } -    DEBUG(dbgs() << "Converted physregs to:\t" << *MI); +    LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI);    } else {      // If the dest of MI is an original reg and MI is reMaterializable,      // don't delete the inst. Replace the dest with a new reg, and keep @@ -357,12 +360,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,      // LiveRangeEdit::DeadRemats and will be deleted after all the      // allocations of the func are done.      if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) { -      LiveInterval &NewLI = createEmptyIntervalFrom(Dest); -      NewLI.removeEmptySubRanges(); +      LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);        VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());        NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));        pop_back(); -      markDeadRemat(MI); +      DeadRemats->insert(MI);        const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();        MI->substituteRegister(Dest, NewLI.reg, 0, TRI);        MI->getOperand(0).setIsDead(true); @@ -463,7 +465,7 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,    for (unsigned I = 0, Size = size(); I < Size; ++I) {      LiveInterval &LI = LIS.getInterval(get(I));      if (MRI.recomputeRegClass(LI.reg)) -      DEBUG({ +      LLVM_DEBUG({          const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();          dbgs() << "Inflated " << printReg(LI.reg) << " to "                 << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; diff --git a/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp index 02e1f3b01ade..f75d513c89f5 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -111,7 +111,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {    MachineRegisterInfo &MRI = MF.getRegInfo(); -  DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');    InstOrderMap IOM;    // Map from register to instruction order (value of IOM) where the @@ -130,7 +130,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {      for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {        MachineInstr &MI = *Next;        ++Next; -      if (MI.isPHI() || MI.isDebugValue()) +      if (MI.isPHI() || MI.isDebugInstr())          continue;        if (MI.mayStore())          SawStore = true; @@ -218,7 +218,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {        if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {          MachineBasicBlock::iterator I = std::next(Insert->getIterator());          // Skip all the PHI and debug instructions. -        while (I != MBB.end() && (I->isPHI() || I->isDebugValue())) +        while (I != MBB.end() && (I->isPHI() || I->isDebugInstr()))            I = std::next(I);          if (I == MI.getIterator())            continue; diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index bd435968296d..e72977b02675 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -102,37 +102,37 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,  }  void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { -  DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) -               << " to " << printReg(PhysReg, TRI) << ':'); +  LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) << " to " +                    << printReg(PhysReg, TRI) << ':');    assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");    VRM->assignVirt2Phys(VirtReg.reg, PhysReg); -  foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, -                                         const LiveRange &Range) { -    DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range); -    Matrix[Unit].unify(VirtReg, Range); -    return false; -  }); +  foreachUnit( +      TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { +        LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI) << ' ' << Range); +        Matrix[Unit].unify(VirtReg, Range); +        return false; +      });    ++NumAssigned; -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');  }  void LiveRegMatrix::unassign(LiveInterval &VirtReg) {    unsigned PhysReg = VRM->getPhys(VirtReg.reg); -  DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) -               << " from " << printReg(PhysReg, TRI) << ':'); +  LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from " +                    << printReg(PhysReg, TRI) << ':');    VRM->clearVirt(VirtReg.reg); -  foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, -                                         const LiveRange &Range) { -    DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI)); -    Matrix[Unit].extract(VirtReg, Range); -    return false; -  }); +  foreachUnit(TRI, VirtReg, PhysReg, +              [&](unsigned Unit, const LiveRange &Range) { +                LLVM_DEBUG(dbgs() << ' ' << printRegUnit(Unit, TRI)); +                Matrix[Unit].extract(VirtReg, Range); +                return false; +              });    ++NumUnassigned; -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');  }  bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { @@ -205,3 +205,19 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {    return IK_Free;  } + +bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, +                                      unsigned PhysReg) { +  // Construct artificial live range containing only one segment [Start, End). +  VNInfo valno(0, Start); +  LiveRange::Segment Seg(Start, End, &valno); +  LiveRange LR; +  LR.addSegment(Seg); + +  // Check for interference with that segment +  for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { +    if (query(LR, *Units).checkInterference()) +      return true; +  } +  return false; +} diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp index 9f28db6287ba..c22681385492 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -46,7 +46,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {    // Remove defined registers and regmask kills from the set.    for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {      if (O->isReg()) { -      if (!O->isDef()) +      if (!O->isDef() || O->isDebug())          continue;        unsigned Reg = O->getReg();        if (!TargetRegisterInfo::isPhysicalRegister(Reg)) @@ -58,7 +58,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {    // Add uses to the set.    for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { -    if (!O->isReg() || !O->readsReg()) +    if (!O->isReg() || !O->readsReg() || O->isDebug())        continue;      unsigned Reg = O->getReg();      if (!TargetRegisterInfo::isPhysicalRegister(Reg)) diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 032dd66ae1d2..0b92eab83806 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -34,6 +34,7 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/Passes.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/raw_ostream.h" @@ -498,7 +499,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,  void LiveVariables::runOnInstr(MachineInstr &MI,                                 SmallVectorImpl<unsigned> &Defs) { -  assert(!MI.isDebugValue()); +  assert(!MI.isDebugInstr());    // Process all of the operands of the instruction...    unsigned NumOperandsToProcess = MI.getNumOperands(); @@ -575,7 +576,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {    DistanceMap.clear();    unsigned Dist = 0;    for (MachineInstr &MI : *MBB) { -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue;      DistanceMap.insert(std::make_pair(&MI, Dist++)); diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index c0da37ede849..f90ce0c8cd2a 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -25,7 +25,6 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -99,7 +98,6 @@ namespace {      void getAnalysisUsage(AnalysisUsage &AU) const override {        AU.setPreservesCFG(); -      AU.addRequired<StackProtector>();        MachineFunctionPass::getAnalysisUsage(AU);      }    }; @@ -109,12 +107,8 @@ namespace {  char LocalStackSlotPass::ID = 0;  char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; - -INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE, -                      "Local Stack Slot Allocation", false, false) -INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE, -                    "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS(LocalStackSlotPass, DEBUG_TYPE, +                "Local Stack Slot Allocation", false, false)  bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {    MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -164,8 +158,8 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI,    Offset = (Offset + Align - 1) / Align * Align;    int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; -  DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " -        << LocalOffset << "\n"); +  LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " +                    << LocalOffset << "\n");    // Keep the offset available for base register allocation    LocalOffsets[FrameIdx] = LocalOffset;    // And tell MFI about it for PEI to use later @@ -202,7 +196,6 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {      TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;    int64_t Offset = 0;    unsigned MaxAlign = 0; -  StackProtector *SP = &getAnalysis<StackProtector>();    // Make sure that the stack protector comes before the local variables on the    // stack. @@ -222,16 +215,16 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {        if (MFI.getStackProtectorIndex() == (int)i)          continue; -      switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) { -      case StackProtector::SSPLK_None: +      switch (MFI.getObjectSSPLayout(i)) { +      case MachineFrameInfo::SSPLK_None:          continue; -      case StackProtector::SSPLK_SmallArray: +      case MachineFrameInfo::SSPLK_SmallArray:          SmallArrayObjs.insert(i);          continue; -      case StackProtector::SSPLK_AddrOf: +      case MachineFrameInfo::SSPLK_AddrOf:          AddrOfObjs.insert(i);          continue; -      case StackProtector::SSPLK_LargeArray: +      case MachineFrameInfo::SSPLK_LargeArray:          LargeArrayObjs.insert(i);          continue;        } @@ -304,7 +297,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {      for (MachineInstr &MI : BB) {        // Debug value, stackmap and patchpoint instructions can't be out of        // range, so they don't need any updates. -      if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STATEPOINT || +      if (MI.isDebugInstr() || MI.getOpcode() == TargetOpcode::STATEPOINT ||            MI.getOpcode() == TargetOpcode::STACKMAP ||            MI.getOpcode() == TargetOpcode::PATCHPOINT)          continue; @@ -335,7 +328,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {    // Sort the frame references by local offset.    // Use frame index as a tie-breaker in case MI's have the same offset. -  std::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); +  llvm::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());    MachineBasicBlock *Entry = &Fn.front(); @@ -351,7 +344,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {      assert(MFI.isObjectPreAllocated(FrameIdx) &&             "Only pre-allocated locals expected!"); -    DEBUG(dbgs() << "Considering: " << MI); +    LLVM_DEBUG(dbgs() << "Considering: " << MI);      unsigned idx = 0;      for (unsigned f = MI.getNumOperands(); idx != f; ++idx) { @@ -367,7 +360,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {      int64_t Offset = 0;      int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0; -    DEBUG(dbgs() << "  Replacing FI in: " << MI); +    LLVM_DEBUG(dbgs() << "  Replacing FI in: " << MI);      // If we have a suitable base register available, use it; otherwise      // create a new one. Note that any offset encoded in the @@ -377,7 +370,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {      if (UsedBaseReg &&          lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust,                                 LocalOffset, MI, TRI)) { -      DEBUG(dbgs() << "  Reusing base register " << BaseReg << "\n"); +      LLVM_DEBUG(dbgs() << "  Reusing base register " << BaseReg << "\n");        // We found a register to reuse.        Offset = FrameSizeAdjust + LocalOffset - BaseOffset;      } else { @@ -405,8 +398,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {        const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);        BaseReg = Fn.getRegInfo().createVirtualRegister(RC); -      DEBUG(dbgs() << "  Materializing base register " << BaseReg << -            " at frame local offset " << LocalOffset + InstrOffset << "\n"); +      LLVM_DEBUG(dbgs() << "  Materializing base register " << BaseReg +                        << " at frame local offset " +                        << LocalOffset + InstrOffset << "\n");        // Tell the target to insert the instruction to initialize        // the base register. @@ -427,7 +421,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {      // Modify the instruction to use the new base register rather      // than the frame index operand.      TRI->resolveFrameIndex(MI, BaseReg, Offset); -    DEBUG(dbgs() << "Resolved: " << MI); +    LLVM_DEBUG(dbgs() << "Resolved: " << MI);      ++NumReplacements;    } diff --git a/contrib/llvm/lib/CodeGen/LoopTraversal.cpp b/contrib/llvm/lib/CodeGen/LoopTraversal.cpp new file mode 100644 index 000000000000..a02d10e09d7d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LoopTraversal.cpp @@ -0,0 +1,77 @@ +//===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LoopTraversal.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunction.h" + +using namespace llvm; + +bool LoopTraversal::isBlockDone(MachineBasicBlock *MBB) { +  unsigned MBBNumber = MBB->getNumber(); +  assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number."); +  return MBBInfos[MBBNumber].PrimaryCompleted && +         MBBInfos[MBBNumber].IncomingCompleted == +             MBBInfos[MBBNumber].PrimaryIncoming && +         MBBInfos[MBBNumber].IncomingProcessed == MBB->pred_size(); +} + +LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) { +  // Initialize the MMBInfos +  MBBInfos.assign(MF.getNumBlockIDs(), MBBInfo()); + +  MachineBasicBlock *Entry = &*MF.begin(); +  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(Entry); +  SmallVector<MachineBasicBlock *, 4> Workqueue; +  SmallVector<TraversedMBBInfo, 4> MBBTraversalOrder; +  for (MachineBasicBlock *MBB : RPOT) { +    // N.B: IncomingProcessed and IncomingCompleted were already updated while +    // processing this block's predecessors. +    unsigned MBBNumber = MBB->getNumber(); +    assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number."); +    MBBInfos[MBBNumber].PrimaryCompleted = true; +    MBBInfos[MBBNumber].PrimaryIncoming = MBBInfos[MBBNumber].IncomingProcessed; +    bool Primary = true; +    Workqueue.push_back(MBB); +    while (!Workqueue.empty()) { +      MachineBasicBlock *ActiveMBB = &*Workqueue.back(); +      Workqueue.pop_back(); +      bool Done = isBlockDone(ActiveMBB); +      MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done)); +      for (MachineBasicBlock *Succ : ActiveMBB->successors()) { +        unsigned SuccNumber = Succ->getNumber(); +        assert(SuccNumber < MBBInfos.size() && +               "Unexpected basic block number."); +        if (!isBlockDone(Succ)) { +          if (Primary) +            MBBInfos[SuccNumber].IncomingProcessed++; +          if (Done) +            MBBInfos[SuccNumber].IncomingCompleted++; +          if (isBlockDone(Succ)) +            Workqueue.push_back(Succ); +        } +      } +      Primary = false; +    } +  } + +  // We need to go through again and finalize any blocks that are not done yet. +  // This is possible if blocks have dead predecessors, so we didn't visit them +  // above. +  for (MachineBasicBlock *MBB : RPOT) { +    if (!isBlockDone(MBB)) +      MBBTraversalOrder.push_back(TraversedMBBInfo(MBB, false, true)); +    // Don't update successors here. We'll get to them anyway through this +    // loop. +  } + +  MBBInfos.clear(); + +  return MBBTraversalOrder; +} diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp index 0cf578b50563..36c1d358a9bd 100644 --- a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -68,7 +68,7 @@ bool LowerEmuTLS::runOnModule(Module &M) {      return false;    auto &TM = TPC->getTM<TargetMachine>(); -  if (!TM.Options.EmulatedTLS) +  if (!TM.useEmulatedTLS())      return false;    bool Changed = false; diff --git a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 4b676a60a8cd..fa43d13b1b85 100644 --- a/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/contrib/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -43,14 +43,13 @@ extern char &MIRCanonicalizerID;  #define DEBUG_TYPE "mir-canonicalizer"  static cl::opt<unsigned> -CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), -                           cl::value_desc("N"), -                           cl::desc("Function number to canonicalize.")); +    CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), +                               cl::value_desc("N"), +                               cl::desc("Function number to canonicalize.")); -static cl::opt<unsigned> -CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), -                             cl::value_desc("N"), -                             cl::desc("BasicBlock number to canonicalize.")); +static cl::opt<unsigned> CanonicalizeBasicBlockNumber( +    "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), +    cl::desc("BasicBlock number to canonicalize."));  namespace { @@ -84,9 +83,9 @@ public:      assert(type != RSE_Reg && "Expected a non-register type.");    } -  bool isReg()        const { return type == RSE_Reg;          } -  bool isFrameIndex() const { return type == RSE_FrameIndex;   } -  bool isCandidate()  const { return type == RSE_NewCandidate; } +  bool isReg() const { return type == RSE_Reg; } +  bool isFrameIndex() const { return type == RSE_FrameIndex; } +  bool isCandidate() const { return type == RSE_NewCandidate; }    VRType getType() const { return type; }    unsigned getReg() const { @@ -115,23 +114,49 @@ static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {    return RPOList;  } -// Set a dummy vreg. We use this vregs register class to generate throw-away -// vregs that are used to skip vreg numbers so that vreg numbers line up. -static unsigned GetDummyVReg(const MachineFunction &MF) { -  for (auto &MBB : MF) { -    for (auto &MI : MBB) { -      for (auto &MO : MI.operands()) { -        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) -          continue; -        return MO.getReg(); -      } -    } +static bool +rescheduleLexographically(std::vector<MachineInstr *> instructions, +                          MachineBasicBlock *MBB, +                          std::function<MachineBasicBlock::iterator()> getPos) { + +  bool Changed = false; +  using StringInstrPair = std::pair<std::string, MachineInstr *>; +  std::vector<StringInstrPair> StringInstrMap; + +  for (auto *II : instructions) { +    std::string S; +    raw_string_ostream OS(S); +    II->print(OS); +    OS.flush(); + +    // Trim the assignment, or start from the begining in the case of a store. +    const size_t i = S.find("="); +    StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); +  } + +  llvm::sort(StringInstrMap.begin(), StringInstrMap.end(), +            [](const StringInstrPair &a, const StringInstrPair &b) -> bool { +              return (a.first < b.first); +            }); + +  for (auto &II : StringInstrMap) { + +    LLVM_DEBUG({ +      dbgs() << "Splicing "; +      II.second->dump(); +      dbgs() << " right before: "; +      getPos()->dump(); +    }); + +    Changed = true; +    MBB->splice(getPos(), MBB, II.second);    } -  return ~0U; +  return Changed;  } -static bool rescheduleCanonically(MachineBasicBlock *MBB) { +static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, +                                  MachineBasicBlock *MBB) {    bool Changed = false; @@ -153,15 +178,62 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {      Instructions.push_back(&MI);    } +  std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers; +  std::vector<MachineInstr *> PseudoIdempotentInstructions; +  std::vector<unsigned> PhysRegDefs; +  for (auto *II : Instructions) { +    for (unsigned i = 1; i < II->getNumOperands(); i++) { +      MachineOperand &MO = II->getOperand(i); +      if (!MO.isReg()) +        continue; + +      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) +        continue; + +      if (!MO.isDef()) +        continue; + +      PhysRegDefs.push_back(MO.getReg()); +    } +  } +    for (auto *II : Instructions) {      if (II->getNumOperands() == 0)        continue; +    if (II->mayLoadOrStore()) +      continue;      MachineOperand &MO = II->getOperand(0);      if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))        continue; +    if (!MO.isDef()) +      continue; + +    bool IsPseudoIdempotent = true; +    for (unsigned i = 1; i < II->getNumOperands(); i++) { + +      if (II->getOperand(i).isImm()) { +        continue; +      } + +      if (II->getOperand(i).isReg()) { +        if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg())) +          if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == +              PhysRegDefs.end()) { +            continue; +          } +      } -    DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); +      IsPseudoIdempotent = false; +      break; +    } + +    if (IsPseudoIdempotent) { +      PseudoIdempotentInstructions.push_back(II); +      continue; +    } + +    LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););      MachineInstr *Def = II;      unsigned Distance = ~0U; @@ -194,9 +266,6 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {        if (DefI != BBE && UseI != BBE)          break; -      if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) -        continue; -        if (&*BBI == Def) {          DefI = BBI;          continue; @@ -211,17 +280,80 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {      if (DefI == BBE || UseI == BBE)        continue; -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "Splicing ";        DefI->dump();        dbgs() << " right before: ";        UseI->dump();      }); +    MultiUsers[UseToBringDefCloserTo].push_back(Def);      Changed = true;      MBB->splice(UseI, MBB, DefI);    } +  // Sort the defs for users of multiple defs lexographically. +  for (const auto &E : MultiUsers) { + +    auto UseI = +        std::find_if(MBB->instr_begin(), MBB->instr_end(), +                     [&](MachineInstr &MI) -> bool { return &MI == E.first; }); + +    if (UseI == MBB->instr_end()) +      continue; + +    LLVM_DEBUG( +        dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); +    Changed |= rescheduleLexographically( +        E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; }); +  } + +  PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); +  LLVM_DEBUG( +      dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); +  Changed |= rescheduleLexographically( +      PseudoIdempotentInstructions, MBB, +      [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); + +  return Changed; +} + +static bool propagateLocalCopies(MachineBasicBlock *MBB) { +  bool Changed = false; +  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + +  std::vector<MachineInstr *> Copies; +  for (MachineInstr &MI : MBB->instrs()) { +    if (MI.isCopy()) +      Copies.push_back(&MI); +  } + +  for (MachineInstr *MI : Copies) { + +    if (!MI->getOperand(0).isReg()) +      continue; +    if (!MI->getOperand(1).isReg()) +      continue; + +    const unsigned Dst = MI->getOperand(0).getReg(); +    const unsigned Src = MI->getOperand(1).getReg(); + +    if (!TargetRegisterInfo::isVirtualRegister(Dst)) +      continue; +    if (!TargetRegisterInfo::isVirtualRegister(Src)) +      continue; +    if (MRI.getRegClass(Dst) != MRI.getRegClass(Src)) +      continue; + +    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { +      MachineOperand *MO = &*UI; +      MO->setReg(Src); +      Changed = true; +    } + +    MI->eraseFromParent(); +  } +    return Changed;  } @@ -245,7 +377,8 @@ static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {        DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);        for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) { -        if (DoesMISideEffect) break; +        if (DoesMISideEffect) +          break;          DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());        }      } @@ -253,7 +386,7 @@ static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {      if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)        continue; -    DEBUG(dbgs() << "Found Candidate:  "; MI->dump();); +    LLVM_DEBUG(dbgs() << "Found Candidate:  "; MI->dump(););      Candidates.push_back(MI);    } @@ -274,7 +407,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,      RegQueue.pop();      if (TReg.isFrameIndex()) { -      DEBUG(dbgs() << "Popping frame index.\n";); +      LLVM_DEBUG(dbgs() << "Popping frame index.\n";);        VRegs.push_back(TypedVReg(RSE_FrameIndex));        continue;      } @@ -283,7 +416,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,      unsigned Reg = TReg.getReg();      if (TargetRegisterInfo::isVirtualRegister(Reg)) { -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "Popping vreg ";          MRI.def_begin(Reg)->dump();          dbgs() << "\n"; @@ -295,7 +428,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,          VRegs.push_back(TypedVReg(Reg));        }      } else { -      DEBUG(dbgs() << "Popping physreg.\n";); +      LLVM_DEBUG(dbgs() << "Popping physreg.\n";);        VRegs.push_back(TypedVReg(Reg));        continue;      } @@ -311,7 +444,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,          break;        } -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "\n========================\n";          dbgs() << "Visited MI: ";          Def->dump(); @@ -323,7 +456,7 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,          MachineOperand &MO = Def->getOperand(I);          if (MO.isFI()) { -          DEBUG(dbgs() << "Pushing frame index.\n";); +          LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);            RegQueue.push(TypedVReg(RSE_FrameIndex));          } @@ -335,33 +468,56 @@ static void doCandidateWalk(std::vector<TypedVReg> &VRegs,    }  } -// TODO: Work to remove this in the future. One day when we have named vregs -// we should be able to form the canonical name based on some characteristic -// we see in that point of the expression tree (like if we were to name based -// on some sort of value numbering scheme). -static void SkipVRegs(unsigned &VRegGapIndex, MachineRegisterInfo &MRI, -                      const TargetRegisterClass *RC) { -  const unsigned VR_GAP = (++VRegGapIndex * 1000); - -  DEBUG({ -    dbgs() << "Adjusting per-BB VR_GAP for BB" << VRegGapIndex << " to " -           << VR_GAP << "\n"; -  }); +namespace { +class NamedVRegCursor { +  MachineRegisterInfo &MRI; +  unsigned virtualVRegNumber; + +public: +  NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) { +    unsigned VRegGapIndex = 0; +    const unsigned VR_GAP = (++VRegGapIndex * 1000); + +    unsigned I = MRI.createIncompleteVirtualRegister(); +    const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; -  unsigned I = MRI.createVirtualRegister(RC); -  const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; -  while (I != E) { -    I = MRI.createVirtualRegister(RC); +    virtualVRegNumber = E;    } -} + +  void SkipVRegs() { +    unsigned VRegGapIndex = 1; +    const unsigned VR_GAP = (++VRegGapIndex * 1000); + +    unsigned I = virtualVRegNumber; +    const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP; + +    virtualVRegNumber = E; +  } + +  unsigned getVirtualVReg() const { return virtualVRegNumber; } + +  unsigned incrementVirtualVReg(unsigned incr = 1) { +    virtualVRegNumber += incr; +    return virtualVRegNumber; +  } + +  unsigned createVirtualRegister(const TargetRegisterClass *RC) { +    std::string S; +    raw_string_ostream OS(S); +    OS << "namedVReg" << (virtualVRegNumber & ~0x80000000); +    OS.flush(); +    virtualVRegNumber++; + +    return MRI.createVirtualRegister(RC, OS.str()); +  } +}; +} // namespace  static std::map<unsigned, unsigned>  GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,                   const std::vector<unsigned> &renamedInOtherBB, -                 MachineRegisterInfo &MRI, -                 const TargetRegisterClass *RC) { +                 MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {    std::map<unsigned, unsigned> VRegRenameMap; -  unsigned LastRenameReg = MRI.createVirtualRegister(RC);    bool FirstCandidate = true;    for (auto &vreg : VRegs) { @@ -370,8 +526,9 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,        // (especially when comparing SelectionDAG to GlobalISel generated MIR)        // that in the other file we are just getting an incoming vreg that comes        // from a copy from a frame index. So it's safe to skip by one. -      LastRenameReg = MRI.createVirtualRegister(RC); -      DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";); +      unsigned LastRenameReg = NVC.incrementVirtualVReg(); +      (void)LastRenameReg; +      LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);        continue;      } else if (vreg.isCandidate()) { @@ -380,20 +537,15 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,        // same vreg number making it more likely that the canonical walk from the        // candidate insruction. We don't need to skip from the first candidate of        // the BasicBlock because we already skip ahead several vregs for each BB. -      while (LastRenameReg % 10) { -        if (!FirstCandidate) break; -        LastRenameReg = MRI.createVirtualRegister(RC); - -        DEBUG({ -          dbgs() << "Skipping rename for new candidate " << LastRenameReg -                 << "\n"; -        }); -      } +      unsigned LastRenameReg = NVC.getVirtualVReg(); +      if (FirstCandidate) +        NVC.incrementVirtualVReg(LastRenameReg % 10);        FirstCandidate = false;        continue;      } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) { -      LastRenameReg = MRI.createVirtualRegister(RC); -      DEBUG({ +      unsigned LastRenameReg = NVC.incrementVirtualVReg(); +      (void)LastRenameReg; +      LLVM_DEBUG({          dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";        });        continue; @@ -401,27 +553,27 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,      auto Reg = vreg.getReg();      if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) { -      DEBUG(dbgs() << "Vreg " << Reg << " already renamed in other BB.\n";); +      LLVM_DEBUG(dbgs() << "Vreg " << Reg +                        << " already renamed in other BB.\n";);        continue;      } -    auto Rename = MRI.createVirtualRegister(MRI.getRegClass(Reg)); -    LastRenameReg = Rename; +    auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg));      if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) { -      DEBUG(dbgs() << "Mapping vreg ";); +      LLVM_DEBUG(dbgs() << "Mapping vreg ";);        if (MRI.reg_begin(Reg) != MRI.reg_end()) { -        DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump();); +        LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););        } else { -        DEBUG(dbgs() << Reg;); +        LLVM_DEBUG(dbgs() << Reg;);        } -      DEBUG(dbgs() << " to ";); +      LLVM_DEBUG(dbgs() << " to ";);        if (MRI.reg_begin(Rename) != MRI.reg_end()) { -        DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump();); +        LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););        } else { -        DEBUG(dbgs() << Rename;); +        LLVM_DEBUG(dbgs() << Rename;);        } -      DEBUG(dbgs() << "\n";); +      LLVM_DEBUG(dbgs() << "\n";);        VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));      } @@ -483,23 +635,25 @@ static bool doDefKillClear(MachineBasicBlock *MBB) {  static bool runOnBasicBlock(MachineBasicBlock *MBB,                              std::vector<StringRef> &bbNames,                              std::vector<unsigned> &renamedInOtherBB, -                            unsigned &basicBlockNum, unsigned &VRegGapIndex) { +                            unsigned &basicBlockNum, unsigned &VRegGapIndex, +                            NamedVRegCursor &NVC) {    if (CanonicalizeBasicBlockNumber != ~0U) {      if (CanonicalizeBasicBlockNumber != basicBlockNum++)        return false; -    DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() << "\n";); +    LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName() +                      << "\n";);    }    if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) { -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()               << "\n";      });      return false;    } -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";      dbgs() << "\n\n================================================\n\n";    }); @@ -508,17 +662,18 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,    MachineFunction &MF = *MBB->getParent();    MachineRegisterInfo &MRI = MF.getRegInfo(); -  const unsigned DummyVReg = GetDummyVReg(MF); -  const TargetRegisterClass *DummyRC = -    (DummyVReg == ~0U) ? nullptr : MRI.getRegClass(DummyVReg); -  if (!DummyRC) return false; -    bbNames.push_back(MBB->getName()); -  DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); +  LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); -  DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); -  Changed |= rescheduleCanonically(MBB); -  DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); +  LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; +             MBB->dump();); +  Changed |= propagateLocalCopies(MBB); +  LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump();); + +  LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); +  unsigned IdempotentInstCount = 0; +  Changed |= rescheduleCanonically(IdempotentInstCount, MBB); +  LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););    std::vector<MachineInstr *> Candidates = populateCandidates(MBB);    std::vector<MachineInstr *> VisitedMIs; @@ -543,7 +698,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,        if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))          continue; -      DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";); +      LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);        RegQueue.push(TypedVReg(MO.getReg()));      } @@ -560,10 +715,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,        if (!MO.isReg() && !MO.isFI())          continue; -      DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); +      LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";); -      RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) : -                                  TypedVReg(RSE_FrameIndex)); +      RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg()) +                               : TypedVReg(RSE_FrameIndex));      }      doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB); @@ -574,15 +729,38 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,    if (VRegs.size() == 0)      return Changed; -  // Skip some vregs, so we can recon where we'll land next. -  SkipVRegs(VRegGapIndex, MRI, DummyRC); - -  auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); +  auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);    Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + +  // Here we renumber the def vregs for the idempotent instructions from the top +  // of the MachineBasicBlock so that they are named in the order that we sorted +  // them alphabetically. Eventually we wont need SkipVRegs because we will use +  // named vregs instead. +  NVC.SkipVRegs(); + +  auto MII = MBB->begin(); +  for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) { +    MachineInstr &MI = *MII++; +    Changed = true; +    unsigned vRegToRename = MI.getOperand(0).getReg(); +    auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename)); + +    std::vector<MachineOperand *> RenameMOs; +    for (auto &MO : MRI.reg_operands(vRegToRename)) { +      RenameMOs.push_back(&MO); +    } + +    for (auto *MO : RenameMOs) { +      MO->setReg(Rename); +    } +  } +    Changed |= doDefKillClear(MBB); -  DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); -  DEBUG(dbgs() << "\n\n================================================\n\n"); +  LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); +             dbgs() << "\n";); +  LLVM_DEBUG( +      dbgs() << "\n\n================================================\n\n");    return Changed;  } @@ -592,22 +770,21 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {    if (CanonicalizeFunctionNumber != ~0U) {      if (CanonicalizeFunctionNumber != functionNum++)        return false; -    DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() << "\n";); +    LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName() +                      << "\n";);    }    // we need a valid vreg to create a vreg type for skipping all those    // stray vreg numbers so reach alignment/canonical vreg values. -  std::vector<MachineBasicBlock*> RPOList = GetRPOList(MF); +  std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF); -  DEBUG( -    dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n"; -    dbgs() << "\n\n================================================\n\n"; -    dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; -    for (auto MBB : RPOList) { -      dbgs() << MBB->getName() << "\n"; -    } -    dbgs() << "\n\n================================================\n\n"; -  ); +  LLVM_DEBUG( +      dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n"; +      dbgs() << "\n\n================================================\n\n"; +      dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n"; +      for (auto MBB +           : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs() +      << "\n\n================================================\n\n";);    std::vector<StringRef> BBNames;    std::vector<unsigned> RenamedInOtherBB; @@ -617,9 +794,11 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {    bool Changed = false; +  MachineRegisterInfo &MRI = MF.getRegInfo(); +  NamedVRegCursor NVC(MRI);    for (auto MBB : RPOList) -    Changed |= runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx); +    Changed |= +        runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC);    return Changed;  } - diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 6adb7f1288d7..da05c9a22785 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -179,23 +179,6 @@ static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,    return C;  } -static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) { -  if ((C.peek() != 'i' && C.peek() != 's' && C.peek() != 'p') || -      !isdigit(C.peek(1))) -    return None; -  char Kind = C.peek(); -  auto Range = C; -  C.advance(); // Skip 'i', 's', or 'p' -  while (isdigit(C.peek())) -    C.advance(); - -  Token.reset(Kind == 'i' -                  ? MIToken::IntegerType -                  : (Kind == 's' ? MIToken::ScalarType : MIToken::PointerType), -              Range.upto(C)); -  return C; -} -  static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {    return StringSwitch<MIToken::TokenKind>(Identifier)        .Case("_", MIToken::underscore) @@ -211,6 +194,14 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {        .Case("renamable", MIToken::kw_renamable)        .Case("tied-def", MIToken::kw_tied_def)        .Case("frame-setup", MIToken::kw_frame_setup) +      .Case("frame-destroy", MIToken::kw_frame_destroy) +      .Case("nnan", MIToken::kw_nnan) +      .Case("ninf", MIToken::kw_ninf) +      .Case("nsz", MIToken::kw_nsz) +      .Case("arcp", MIToken::kw_arcp) +      .Case("contract", MIToken::kw_contract) +      .Case("afn", MIToken::kw_afn) +      .Case("reassoc", MIToken::kw_reassoc)        .Case("debug-location", MIToken::kw_debug_location)        .Case("same_value", MIToken::kw_cfi_same_value)        .Case("offset", MIToken::kw_cfi_offset) @@ -241,6 +232,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {        .Case("dereferenceable", MIToken::kw_dereferenceable)        .Case("invariant", MIToken::kw_invariant)        .Case("align", MIToken::kw_align) +      .Case("addrspace", MIToken::kw_addrspace)        .Case("stack", MIToken::kw_stack)        .Case("got", MIToken::kw_got)        .Case("jump-table", MIToken::kw_jump_table) @@ -408,17 +400,38 @@ static bool isRegisterChar(char C) {    return isIdentifierChar(C) && C != '.';  } -static Cursor maybeLexRegister(Cursor C, MIToken &Token) { -  if (C.peek() != '%') +static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { +  Cursor Range = C; +  C.advance(); // Skip '%' +  while (isRegisterChar(C.peek())) +    C.advance(); +  Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) +      .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' +  return C; +} + +static Cursor maybeLexRegister(Cursor C, MIToken &Token, +                               ErrorCallbackType ErrorCallback) { +  if (C.peek() != '%' && C.peek() != '$') +    return None; + +  if (C.peek() == '%') { +    if (isdigit(C.peek(1))) +      return lexVirtualRegister(C, Token); + +    if (isRegisterChar(C.peek(1))) +      return lexNamedVirtualRegister(C, Token); +      return None; -  if (isdigit(C.peek(1))) -    return lexVirtualRegister(C, Token); +  } + +  assert(C.peek() == '$');    auto Range = C; -  C.advance(); // Skip '%' +  C.advance(); // Skip '$'    while (isRegisterChar(C.peek()))      C.advance();    Token.reset(MIToken::NamedRegister, Range.upto(C)) -      .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' +      .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$'    return C;  } @@ -441,7 +454,7 @@ static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,  static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,                                       ErrorCallbackType ErrorCallback) { -  if (C.peek() != '$') +  if (C.peek() != '&')      return None;    return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,                   ErrorCallback); @@ -620,8 +633,6 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,      return C.remaining();    } -  if (Cursor R = maybeLexIntegerOrScalarType(C, Token)) -    return R.remaining();    if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))      return R.remaining();    if (Cursor R = maybeLexIdentifier(C, Token)) @@ -640,7 +651,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,      return R.remaining();    if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))      return R.remaining(); -  if (Cursor R = maybeLexRegister(C, Token)) +  if (Cursor R = maybeLexRegister(C, Token, ErrorCallback))      return R.remaining();    if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))      return R.remaining(); diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h index 0204d549d5d4..e21c71532f79 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -63,6 +63,14 @@ struct MIToken {      kw_renamable,      kw_tied_def,      kw_frame_setup, +    kw_frame_destroy, +    kw_nnan, +    kw_ninf, +    kw_nsz, +    kw_arcp, +    kw_contract, +    kw_afn, +    kw_reassoc,      kw_debug_location,      kw_cfi_same_value,      kw_cfi_offset, @@ -92,6 +100,7 @@ struct MIToken {      kw_non_temporal,      kw_invariant,      kw_align, +    kw_addrspace,      kw_stack,      kw_got,      kw_jump_table, @@ -114,12 +123,10 @@ struct MIToken {      // Identifier tokens      Identifier, -    IntegerType,      NamedRegister, +    NamedVirtualRegister,      MachineBasicBlockLabel,      MachineBasicBlock, -    PointerType, -    ScalarType,      StackObject,      FixedStackObject,      NamedGlobalValue, @@ -168,7 +175,7 @@ public:    bool isRegister() const {      return Kind == NamedRegister || Kind == underscore || -           Kind == VirtualRegister; +           Kind == NamedVirtualRegister || Kind == VirtualRegister;    }    bool isRegisterFlag() const { diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 1a78ae3aad07..a61e7872f1ae 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -98,6 +98,18 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {    return *I.first->second;  } +VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) { +  assert(RegName != "" && "Expected named reg."); + +  auto I = VRegInfosNamed.insert(std::make_pair(RegName.str(), nullptr)); +  if (I.second) { +    VRegInfo *Info = new (Allocator) VRegInfo; +    Info->VReg = MF.getRegInfo().createIncompleteVirtualRegister(RegName); +    I.first->second = Info; +  } +  return *I.first->second; +} +  namespace {  /// A wrapper struct around the 'MachineOperand' struct that includes a source @@ -182,6 +194,7 @@ public:    bool parseNamedRegister(unsigned &Reg);    bool parseVirtualRegister(VRegInfo *&Info); +  bool parseNamedVirtualRegister(VRegInfo *&Info);    bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);    bool parseRegisterFlag(unsigned &Flags);    bool parseRegisterClassOrBank(VRegInfo &RegInfo); @@ -190,7 +203,7 @@ public:    bool parseRegisterOperand(MachineOperand &Dest,                              Optional<unsigned> &TiedDefIdx, bool IsDef = false);    bool parseImmediateOperand(MachineOperand &Dest); -  bool parseIRConstant(StringRef::iterator Loc, StringRef Source, +  bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,                         const Constant *&C);    bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);    bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty); @@ -209,7 +222,7 @@ public:    bool parseJumpTableIndexOperand(MachineOperand &Dest);    bool parseExternalSymbolOperand(MachineOperand &Dest);    bool parseMDNode(MDNode *&Node); -  bool parseDIExpression(MDNode *&Node); +  bool parseDIExpression(MDNode *&Expr);    bool parseMetadataOperand(MachineOperand &Dest);    bool parseCFIOffset(int &Offset);    bool parseCFIRegister(unsigned &Reg); @@ -228,6 +241,7 @@ public:                                           Optional<unsigned> &TiedDefIdx);    bool parseOffset(int64_t &Offset);    bool parseAlignment(unsigned &Alignment); +  bool parseAddrspace(unsigned &Addrspace);    bool parseOperandsOffset(MachineOperand &Op);    bool parseIRValue(const Value *&V);    bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); @@ -915,15 +929,43 @@ bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,        continue;      return error(Operands.empty() ? Token.location() : Operands.back().End,                   Twine("missing implicit register operand '") + -                     printImplicitRegisterFlag(I) + " %" + +                     printImplicitRegisterFlag(I) + " $" +                       getRegisterName(TRI, I.getReg()) + "'");    }    return false;  }  bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { -  if (Token.is(MIToken::kw_frame_setup)) { -    Flags |= MachineInstr::FrameSetup; +  // Allow frame and fast math flags for OPCODE +  while (Token.is(MIToken::kw_frame_setup) || +         Token.is(MIToken::kw_frame_destroy) || +         Token.is(MIToken::kw_nnan) || +         Token.is(MIToken::kw_ninf) || +         Token.is(MIToken::kw_nsz) || +         Token.is(MIToken::kw_arcp) || +         Token.is(MIToken::kw_contract) || +         Token.is(MIToken::kw_afn) || +         Token.is(MIToken::kw_reassoc)) { +    // Mine frame and fast math flags +    if (Token.is(MIToken::kw_frame_setup)) +      Flags |= MachineInstr::FrameSetup; +    if (Token.is(MIToken::kw_frame_destroy)) +      Flags |= MachineInstr::FrameDestroy; +    if (Token.is(MIToken::kw_nnan)) +      Flags |= MachineInstr::FmNoNans; +    if (Token.is(MIToken::kw_ninf)) +      Flags |= MachineInstr::FmNoInfs; +    if (Token.is(MIToken::kw_nsz)) +      Flags |= MachineInstr::FmNsz; +    if (Token.is(MIToken::kw_arcp)) +      Flags |= MachineInstr::FmArcp; +    if (Token.is(MIToken::kw_contract)) +      Flags |= MachineInstr::FmContract; +    if (Token.is(MIToken::kw_afn)) +      Flags |= MachineInstr::FmAfn; +    if (Token.is(MIToken::kw_reassoc)) +      Flags |= MachineInstr::FmReassoc; +      lex();    }    if (Token.isNot(MIToken::Identifier)) @@ -943,7 +985,18 @@ bool MIParser::parseNamedRegister(unsigned &Reg) {    return false;  } +bool MIParser::parseNamedVirtualRegister(VRegInfo *&Info) { +  assert(Token.is(MIToken::NamedVirtualRegister) && "Expected NamedVReg token"); +  StringRef Name = Token.stringValue(); +  // TODO: Check that the VReg name is not the same as a physical register name. +  //       If it is, then print a warning (when warnings are implemented). +  Info = &PFS.getVRegInfoNamed(Name); +  return false; +} +  bool MIParser::parseVirtualRegister(VRegInfo *&Info) { +  if (Token.is(MIToken::NamedVirtualRegister)) +    return parseNamedVirtualRegister(Info);    assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token");    unsigned ID;    if (getUnsigned(ID)) @@ -959,6 +1012,7 @@ bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {      return false;    case MIToken::NamedRegister:      return parseNamedRegister(Reg); +  case MIToken::NamedVirtualRegister:    case MIToken::VirtualRegister:      if (parseVirtualRegister(Info))        return true; @@ -1249,11 +1303,17 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {  }  bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { -  if (Token.is(MIToken::ScalarType)) { +  if (Token.range().front() == 's' || Token.range().front() == 'p') { +    StringRef SizeStr = Token.range().drop_front(); +    if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) +      return error("expected integers after 's'/'p' type character"); +  } + +  if (Token.range().front() == 's') {      Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());      lex();      return false; -  } else if (Token.is(MIToken::PointerType)) { +  } else if (Token.range().front() == 'p') {      const DataLayout &DL = MF.getDataLayout();      unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();      Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); @@ -1264,38 +1324,60 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {    // Now we're looking for a vector.    if (Token.isNot(MIToken::less))      return error(Loc, -                 "expected unsized, pN, sN or <N x sM> for GlobalISel type"); - +                 "expected sN, pA, <M x sN>, or <M x pA> for GlobalISel type");    lex();    if (Token.isNot(MIToken::IntegerLiteral)) -    return error(Loc, "expected <N x sM> for vctor type"); +    return error(Loc, "expected <M x sN> or <M x pA> for vector type");    uint64_t NumElements = Token.integerValue().getZExtValue();    lex();    if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x") -    return error(Loc, "expected '<N x sM>' for vector type"); +    return error(Loc, "expected <M x sN> or <M x pA> for vector type");    lex(); -  if (Token.isNot(MIToken::ScalarType)) -    return error(Loc, "expected '<N x sM>' for vector type"); -  uint64_t ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); +  if (Token.range().front() != 's' && Token.range().front() != 'p') +    return error(Loc, "expected <M x sN> or <M x pA> for vector type"); +  StringRef SizeStr = Token.range().drop_front(); +  if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) +    return error("expected integers after 's'/'p' type character"); + +  if (Token.range().front() == 's') +    Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue()); +  else if (Token.range().front() == 'p') { +    const DataLayout &DL = MF.getDataLayout(); +    unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); +    Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); +  } else +    return error(Loc, "expected <M x sN> or <M x pA> for vector type");    lex();    if (Token.isNot(MIToken::greater)) -    return error(Loc, "expected '<N x sM>' for vector type"); +    return error(Loc, "expected <M x sN> or <M x pA> for vector type");    lex(); -  Ty = LLT::vector(NumElements, ScalarSize); +  Ty = LLT::vector(NumElements, Ty);    return false;  }  bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { -  assert(Token.is(MIToken::IntegerType)); +  assert(Token.is(MIToken::Identifier)); +  StringRef TypeStr = Token.range(); +  if (TypeStr.front() != 'i' && TypeStr.front() != 's' && +      TypeStr.front() != 'p') +    return error( +        "a typed immediate operand should start with one of 'i', 's', or 'p'"); +  StringRef SizeStr = Token.range().drop_front(); +  if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit)) +    return error("expected integers after 'i'/'s'/'p' type character"); +    auto Loc = Token.location();    lex(); -  if (Token.isNot(MIToken::IntegerLiteral)) -    return error("expected an integer literal"); +  if (Token.isNot(MIToken::IntegerLiteral)) { +    if (Token.isNot(MIToken::Identifier) || +        !(Token.range() == "true" || Token.range() == "false")) +      return error("expected an integer literal"); +  }    const Constant *C = nullptr;    if (parseIRConstant(Loc, C))      return true; @@ -1876,13 +1958,11 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {  bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {    assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask"); -  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); -  assert(TRI && "Expected target register info");    lex();    if (expectAndConsume(MIToken::lparen))      return true; -  uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); +  uint32_t *Mask = MF.allocateRegMask();    while (true) {      if (Token.isNot(MIToken::NamedRegister))        return error("expected a named register"); @@ -1905,9 +1985,7 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {  bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {    assert(Token.is(MIToken::kw_liveout)); -  const auto *TRI = MF.getSubtarget().getRegisterInfo(); -  assert(TRI && "Expected target register info"); -  uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); +  uint32_t *Mask = MF.allocateRegMask();    lex();    if (expectAndConsume(MIToken::lparen))      return true; @@ -1946,11 +2024,10 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,    case MIToken::underscore:    case MIToken::NamedRegister:    case MIToken::VirtualRegister: +  case MIToken::NamedVirtualRegister:      return parseRegisterOperand(Dest, TiedDefIdx);    case MIToken::IntegerLiteral:      return parseImmediateOperand(Dest); -  case MIToken::IntegerType: -    return parseTypedImmediateOperand(Dest);    case MIToken::kw_half:    case MIToken::kw_float:    case MIToken::kw_double: @@ -2011,8 +2088,10 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,        Dest = MachineOperand::CreateRegMask(RegMask);        lex();        break; -    } else +    } else if (Token.stringValue() == "CustomRegMask") {        return parseCustomRegisterMaskOperand(Dest); +    } else +      return parseTypedImmediateOperand(Dest);    default:      // FIXME: Parse the MCSymbol machine operand.      return error("expected a machine operand"); @@ -2091,6 +2170,17 @@ bool MIParser::parseAlignment(unsigned &Alignment) {    return false;  } +bool MIParser::parseAddrspace(unsigned &Addrspace) { +  assert(Token.is(MIToken::kw_addrspace)); +  lex(); +  if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) +    return error("expected an integer literal after 'addrspace'"); +  if (getUnsigned(Addrspace)) +    return true; +  lex(); +  return false; +} +  bool MIParser::parseOperandsOffset(MachineOperand &Op) {    int64_t Offset = 0;    if (parseOffset(Offset)) @@ -2402,6 +2492,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {        if (parseAlignment(BaseAlignment))          return true;        break; +    case MIToken::kw_addrspace: +      if (parseAddrspace(Ptr.AddrSpace)) +        return true; +      break;      case MIToken::md_tbaa:        lex();        if (parseMDNode(AAInfo.TBAA)) diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h index 2307881068ef..b06ceb21b740 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h @@ -56,6 +56,7 @@ struct PerFunctionMIParsingState {    DenseMap<unsigned, MachineBasicBlock *> MBBSlots;    DenseMap<unsigned, VRegInfo*> VRegInfos; +  StringMap<VRegInfo*> VRegInfosNamed;    DenseMap<unsigned, int> FixedStackObjectSlots;    DenseMap<unsigned, int> StackObjectSlots;    DenseMap<unsigned, unsigned> ConstantPoolSlots; @@ -66,7 +67,8 @@ struct PerFunctionMIParsingState {                              const Name2RegClassMap &Names2RegClasses,                              const Name2RegBankMap &Names2RegBanks); -  VRegInfo &getVRegInfo(unsigned VReg); +  VRegInfo &getVRegInfo(unsigned Num); +  VRegInfo &getVRegInfoNamed(StringRef RegName);  };  /// Parse the machine basic block definitions, and skip the machine diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 7d8e62736a34..3d2db97acb48 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -122,8 +122,9 @@ public:                                  const yaml::StringValue &RegisterSource,                                  bool IsRestored, int FrameIdx); +  template <typename T>    bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, -                                  const yaml::MachineStackObject &Object, +                                  const T &Object,                                    int FrameIdx);    bool initializeConstantPool(PerFunctionMIParsingState &PFS, @@ -237,7 +238,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {            dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {      SMDiagnostic Error;      M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, -                      Context, &IRSlots); +                      Context, &IRSlots, /*UpgradeDebugInfo=*/false);      if (!M) {        reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));        return nullptr; @@ -362,6 +363,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,          MachineFunctionProperties::Property::RegBankSelected);    if (YamlMF.Selected)      MF.getProperties().set(MachineFunctionProperties::Property::Selected); +  if (YamlMF.FailedISel) +    MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);    PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,                                  Names2RegBanks); @@ -417,6 +420,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,    computeFunctionProperties(MF); +  MF.getSubtarget().mirFileLoaded(MF); +    MF.verify();    return false;  } @@ -508,13 +513,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,    MachineRegisterInfo &MRI = MF.getRegInfo();    bool Error = false;    // Create VRegs -  for (auto P : PFS.VRegInfos) { -    const VRegInfo &Info = *P.second; +  auto populateVRegInfo = [&] (const VRegInfo &Info, Twine Name) {      unsigned Reg = Info.VReg;      switch (Info.Kind) {      case VRegInfo::UNKNOWN:        error(Twine("Cannot determine class/bank of virtual register ") + -            Twine(P.first) + " in function '" + MF.getName() + "'"); +            Name + " in function '" + MF.getName() + "'");        Error = true;        break;      case VRegInfo::NORMAL: @@ -528,6 +532,17 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,        MRI.setRegBank(Reg, *Info.D.RegBank);        break;      } +  }; + +  for (auto I = PFS.VRegInfosNamed.begin(), E = PFS.VRegInfosNamed.end(); +       I != E; I++) { +    const VRegInfo &Info = *I->second; +    populateVRegInfo(Info, Twine(I->first())); +  } + +  for (auto P : PFS.VRegInfos) { +    const VRegInfo &Info = *P.second; +    populateVRegInfo(Info, Twine(P.first));    }    // Compute MachineRegisterInfo::UsedPhysRegMask @@ -568,6 +583,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,    MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);    MFI.setHasVAStart(YamlMFI.HasVAStart);    MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); +  MFI.setLocalFrameSize(YamlMFI.LocalFrameSize);    if (!YamlMFI.SavePoint.Value.empty()) {      MachineBasicBlock *MBB = nullptr;      if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint)) @@ -601,6 +617,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,      if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,                                   Object.CalleeSavedRestored, ObjectIdx))        return true; +    if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx)) +      return true;    }    // Initialize the ordinary frame objects. @@ -685,11 +703,11 @@ static bool typecheckMDNode(T *&Result, MDNode *Node,    return false;  } +template <typename T>  bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, -    const yaml::MachineStackObject &Object, int FrameIdx) { +    const T &Object, int FrameIdx) {    // Debug information can only be attached to stack objects; Fixed stack    // objects aren't supported. -  assert(FrameIdx >= 0 && "Expected a stack object frame index");    MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;    if (parseMDNode(PFS, Var, Object.DebugVar) ||        parseMDNode(PFS, Expr, Object.DebugExpr) || @@ -704,7 +722,7 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,        typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||        typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))      return true; -  PFS.MF.setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); +  PFS.MF.setVariableDbgInfo(DIVar, DIExpr, FrameIdx, DILoc);    return false;  } diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp index f91cca6e4e50..bf8cd1489ec5 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -19,7 +19,6 @@  #include "llvm/ADT/SmallBitVector.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/StringRef.h"  #include "llvm/ADT/Twine.h"  #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -157,14 +156,10 @@ public:    void print(const MachineBasicBlock &MBB);    void print(const MachineInstr &MI); -  void printIRValueReference(const Value &V);    void printStackObjectReference(int FrameIndex);    void print(const MachineInstr &MI, unsigned OpIdx,               const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies,               LLT TypeToPrint, bool PrintDef = true); -  void print(const LLVMContext &Context, const TargetInstrInfo &TII, -             const MachineMemOperand &Op); -  void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);  };  } // end namespace llvm @@ -207,6 +202,8 @@ void MIRPrinter::print(const MachineFunction &MF) {        MachineFunctionProperties::Property::RegBankSelected);    YamlMF.Selected = MF.getProperties().hasProperty(        MachineFunctionProperties::Property::Selected); +  YamlMF.FailedISel = MF.getProperties().hasProperty( +      MachineFunctionProperties::Property::FailedISel);    convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());    ModuleSlotTracker MST(MF.getFunction().getParent()); @@ -259,6 +256,21 @@ static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest,    OS << printRegClassOrBank(Reg, RegInfo, TRI);  } +template <typename T> +static void +printStackObjectDbgInfo(const MachineFunction::VariableDbgInfo &DebugVar, +                        T &Object, ModuleSlotTracker &MST) { +  std::array<std::string *, 3> Outputs{{&Object.DebugVar.Value, +                                        &Object.DebugExpr.Value, +                                        &Object.DebugLoc.Value}}; +  std::array<const Metadata *, 3> Metas{{DebugVar.Var, +                                        DebugVar.Expr, +                                        DebugVar.Loc}}; +  for (unsigned i = 0; i < 3; ++i) { +    raw_string_ostream StrOS(*Outputs[i]); +    Metas[i]->printAsOperand(StrOS, MST); +  } +}  void MIRPrinter::convert(yaml::MachineFunction &MF,                           const MachineRegisterInfo &RegInfo, @@ -270,6 +282,8 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,      unsigned Reg = TargetRegisterInfo::index2VirtReg(I);      yaml::VirtualRegisterDefinition VReg;      VReg.ID = I; +    if (RegInfo.getVRegName(Reg) != "") +      continue;      ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);      unsigned PreferredReg = RegInfo.getSimpleHint(Reg);      if (PreferredReg) @@ -316,6 +330,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,    YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();    YamlMFI.HasVAStart = MFI.hasVAStart();    YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); +  YamlMFI.LocalFrameSize = MFI.getLocalFrameSize();    if (MFI.getSavePoint()) {      raw_string_ostream StrOS(YamlMFI.SavePoint.Value);      StrOS << printMBBReference(*MFI.getSavePoint()); @@ -421,19 +436,12 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,      assert(StackObjectInfo != StackObjectOperandMapping.end() &&             "Invalid stack object index");      const FrameIndexOperand &StackObject = StackObjectInfo->second; -    assert(!StackObject.IsFixed && "Expected a non-fixed stack object"); -    auto &Object = YMF.StackObjects[StackObject.ID]; -    { -      raw_string_ostream StrOS(Object.DebugVar.Value); -      DebugVar.Var->printAsOperand(StrOS, MST); -    } -    { -      raw_string_ostream StrOS(Object.DebugExpr.Value); -      DebugVar.Expr->printAsOperand(StrOS, MST); -    } -    { -      raw_string_ostream StrOS(Object.DebugLoc.Value); -      DebugVar.Loc->printAsOperand(StrOS, MST); +    if (StackObject.IsFixed) { +      auto &Object = YMF.FixedStackObjects[StackObject.ID]; +      printStackObjectDbgInfo(DebugVar, Object, MST); +    } else { +      auto &Object = YMF.StackObjects[StackObject.ID]; +      printStackObjectDbgInfo(DebugVar, Object, MST);      }    }  } @@ -670,6 +678,23 @@ void MIPrinter::print(const MachineInstr &MI) {      OS << " = ";    if (MI.getFlag(MachineInstr::FrameSetup))      OS << "frame-setup "; +  if (MI.getFlag(MachineInstr::FrameDestroy)) +    OS << "frame-destroy "; +  if (MI.getFlag(MachineInstr::FmNoNans)) +    OS << "nnan "; +  if (MI.getFlag(MachineInstr::FmNoInfs)) +    OS << "ninf "; +  if (MI.getFlag(MachineInstr::FmNsz)) +    OS << "nsz "; +  if (MI.getFlag(MachineInstr::FmArcp)) +    OS << "arcp "; +  if (MI.getFlag(MachineInstr::FmContract)) +    OS << "contract "; +  if (MI.getFlag(MachineInstr::FmAfn)) +    OS << "afn "; +  if (MI.getFlag(MachineInstr::FmReassoc)) +    OS << "reassoc "; +    OS << TII->getName(MI.getOpcode());    if (I < E)      OS << ' '; @@ -683,46 +708,27 @@ void MIPrinter::print(const MachineInstr &MI) {      NeedComma = true;    } -  if (MI.getDebugLoc()) { +  if (const DebugLoc &DL = MI.getDebugLoc()) {      if (NeedComma)        OS << ',';      OS << " debug-location "; -    MI.getDebugLoc()->printAsOperand(OS, MST); +    DL->printAsOperand(OS, MST);    }    if (!MI.memoperands_empty()) {      OS << " :: ";      const LLVMContext &Context = MF->getFunction().getContext(); +    const MachineFrameInfo &MFI = MF->getFrameInfo();      bool NeedComma = false;      for (const auto *Op : MI.memoperands()) {        if (NeedComma)          OS << ", "; -      print(Context, *TII, *Op); +      Op->print(OS, MST, SSNs, Context, &MFI, TII);        NeedComma = true;      }    }  } -void MIPrinter::printIRValueReference(const Value &V) { -  if (isa<GlobalValue>(V)) { -    V.printAsOperand(OS, /*PrintType=*/false, MST); -    return; -  } -  if (isa<Constant>(V)) { -    // Machine memory operands can load/store to/from constant value pointers. -    OS << '`'; -    V.printAsOperand(OS, /*PrintType=*/true, MST); -    OS << '`'; -    return; -  } -  OS << "%ir."; -  if (V.hasName()) { -    printLLVMNameWithoutPrefix(OS, V.getName()); -    return; -  } -  MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V)); -} -  void MIPrinter::printStackObjectReference(int FrameIndex) {    auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex);    assert(ObjectInfo != StackObjectOperandMapping.end() && @@ -741,7 +747,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,    case MachineOperand::MO_Immediate:      if (MI.isOperandSubregIdx(OpIdx)) {        MachineOperand::printTargetFlags(OS, Op); -      MachineOperand::printSubregIdx(OS, Op.getImm(), TRI); +      MachineOperand::printSubRegIdx(OS, Op.getImm(), TRI);        break;      }      LLVM_FALLTHROUGH; @@ -765,8 +771,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,      if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())        TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);      const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo(); -    Op.print(OS, MST, TypeToPrint, PrintDef, ShouldPrintRegisterTies, -             TiedOperandIdx, TRI, TII); +    Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false, +             ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);      break;    }    case MachineOperand::MO_FrameIndex: @@ -783,132 +789,6 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,    }  } -static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, -                                        unsigned TMMOFlag) { -  auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); -  for (const auto &I : Flags) { -    if (I.first == TMMOFlag) { -      return I.second; -    } -  } -  return nullptr; -} - -void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, -                      const MachineMemOperand &Op) { -  OS << '('; -  if (Op.isVolatile()) -    OS << "volatile "; -  if (Op.isNonTemporal()) -    OS << "non-temporal "; -  if (Op.isDereferenceable()) -    OS << "dereferenceable "; -  if (Op.isInvariant()) -    OS << "invariant "; -  if (Op.getFlags() & MachineMemOperand::MOTargetFlag1) -    OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1) -       << "\" "; -  if (Op.getFlags() & MachineMemOperand::MOTargetFlag2) -    OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2) -       << "\" "; -  if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) -    OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) -       << "\" "; - -  assert((Op.isLoad() || Op.isStore()) && "machine memory operand must be a load or store (or both)"); -  if (Op.isLoad()) -    OS << "load "; -  if (Op.isStore()) -    OS << "store "; - -  printSyncScope(Context, Op.getSyncScopeID()); - -  if (Op.getOrdering() != AtomicOrdering::NotAtomic) -    OS << toIRString(Op.getOrdering()) << ' '; -  if (Op.getFailureOrdering() != AtomicOrdering::NotAtomic) -    OS << toIRString(Op.getFailureOrdering()) << ' '; - -  OS << Op.getSize(); -  if (const Value *Val = Op.getValue()) { -    OS << ((Op.isLoad() && Op.isStore()) ? " on " -                                         : Op.isLoad() ? " from " : " into "); -    printIRValueReference(*Val); -  } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) { -    OS << ((Op.isLoad() && Op.isStore()) ? " on " -                                         : Op.isLoad() ? " from " : " into "); -    assert(PVal && "Expected a pseudo source value"); -    switch (PVal->kind()) { -    case PseudoSourceValue::Stack: -      OS << "stack"; -      break; -    case PseudoSourceValue::GOT: -      OS << "got"; -      break; -    case PseudoSourceValue::JumpTable: -      OS << "jump-table"; -      break; -    case PseudoSourceValue::ConstantPool: -      OS << "constant-pool"; -      break; -    case PseudoSourceValue::FixedStack: -      printStackObjectReference( -          cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex()); -      break; -    case PseudoSourceValue::GlobalValueCallEntry: -      OS << "call-entry "; -      cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand( -          OS, /*PrintType=*/false, MST); -      break; -    case PseudoSourceValue::ExternalSymbolCallEntry: -      OS << "call-entry $"; -      printLLVMNameWithoutPrefix( -          OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); -      break; -    case PseudoSourceValue::TargetCustom: -      llvm_unreachable("TargetCustom pseudo source values are not supported"); -      break; -    } -  } -  MachineOperand::printOperandOffset(OS, Op.getOffset()); -  if (Op.getBaseAlignment() != Op.getSize()) -    OS << ", align " << Op.getBaseAlignment(); -  auto AAInfo = Op.getAAInfo(); -  if (AAInfo.TBAA) { -    OS << ", !tbaa "; -    AAInfo.TBAA->printAsOperand(OS, MST); -  } -  if (AAInfo.Scope) { -    OS << ", !alias.scope "; -    AAInfo.Scope->printAsOperand(OS, MST); -  } -  if (AAInfo.NoAlias) { -    OS << ", !noalias "; -    AAInfo.NoAlias->printAsOperand(OS, MST); -  } -  if (Op.getRanges()) { -    OS << ", !range "; -    Op.getRanges()->printAsOperand(OS, MST); -  } -  OS << ')'; -} - -void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) { -  switch (SSID) { -  case SyncScope::System: { -    break; -  } -  default: { -    if (SSNs.empty()) -      Context.getSyncScopeNames(SSNs); - -    OS << "syncscope(\""; -    PrintEscapedString(SSNs[SSID], OS); -    OS << "\") "; -    break; -  } -  } -} -  void llvm::printMIR(raw_ostream &OS, const Module &M) {    yaml::Output Out(OS);    Out << const_cast<Module &>(M); diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index cd67449e3acf..38e8369dc739 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -24,6 +24,7 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DebugInfoMetadata.h" @@ -173,7 +174,7 @@ MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {    const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();    iterator E = end(); -  while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() || +  while (I != E && (I->isPHI() || I->isPosition() || I->isDebugInstr() ||                      TII->isBasicBlockPrologue(*I)))      ++I;    // FIXME: This needs to change if we wish to bundle labels / dbg_values @@ -186,7 +187,7 @@ MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {  MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {    iterator B = begin(), E = end(), I = E; -  while (I != B && ((--I)->isTerminator() || I->isDebugValue())) +  while (I != B && ((--I)->isTerminator() || I->isDebugInstr()))      ; /*noop */    while (I != E && !I->isTerminator())      ++I; @@ -195,7 +196,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {  MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {    instr_iterator B = instr_begin(), E = instr_end(), I = E; -  while (I != B && ((--I)->isTerminator() || I->isDebugValue())) +  while (I != B && ((--I)->isTerminator() || I->isDebugInstr()))      ; /*noop */    while (I != E && !I->isTerminator())      ++I; @@ -213,7 +214,7 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {    while (I != B) {      --I;      // Return instruction that starts a bundle. -    if (I->isDebugValue() || I->isInsideBundle()) +    if (I->isDebugInstr() || I->isInsideBundle())        continue;      return I;    } @@ -259,8 +260,8 @@ std::string MachineBasicBlock::getFullName() const {    return Name;  } -void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes) -    const { +void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes, +                              bool IsStandalone) const {    const MachineFunction *MF = getParent();    if (!MF) {      OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -270,11 +271,13 @@ void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes)    const Function &F = MF->getFunction();    const Module *M = F.getParent();    ModuleSlotTracker MST(M); -  print(OS, MST, Indexes); +  MST.incorporateFunction(F); +  print(OS, MST, Indexes, IsStandalone);  }  void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, -                              const SlotIndexes *Indexes) const { +                              const SlotIndexes *Indexes, +                              bool IsStandalone) const {    const MachineFunction *MF = getParent();    if (!MF) {      OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -285,70 +288,143 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,    if (Indexes)      OS << Indexes->getMBBStartIdx(this) << '\t'; -  OS << printMBBReference(*this) << ": "; - -  const char *Comma = ""; -  if (const BasicBlock *LBB = getBasicBlock()) { -    OS << Comma << "derived from LLVM BB "; -    LBB->printAsOperand(OS, /*PrintType=*/false, MST); -    Comma = ", "; +  OS << "bb." << getNumber(); +  bool HasAttributes = false; +  if (const auto *BB = getBasicBlock()) { +    if (BB->hasName()) { +      OS << "." << BB->getName(); +    } else { +      HasAttributes = true; +      OS << " ("; +      int Slot = MST.getLocalSlot(BB); +      if (Slot == -1) +        OS << "<ir-block badref>"; +      else +        OS << (Twine("%ir-block.") + Twine(Slot)).str(); +    }    } -  if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } -  if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } -  if (Alignment) -    OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) -       << " bytes)"; -  OS << '\n'; +  if (hasAddressTaken()) { +    OS << (HasAttributes ? ", " : " ("); +    OS << "address-taken"; +    HasAttributes = true; +  } +  if (isEHPad()) { +    OS << (HasAttributes ? ", " : " ("); +    OS << "landing-pad"; +    HasAttributes = true; +  } +  if (getAlignment()) { +    OS << (HasAttributes ? ", " : " ("); +    OS << "align " << getAlignment(); +    HasAttributes = true; +  } +  if (HasAttributes) +    OS << ")"; +  OS << ":\n";    const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); -  if (!livein_empty()) { +  const MachineRegisterInfo &MRI = MF->getRegInfo(); +  const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo(); +  bool HasLineAttributes = false; + +  // Print the preds of this block according to the CFG. +  if (!pred_empty() && IsStandalone) {      if (Indexes) OS << '\t'; -    OS << "    Live Ins:"; -    for (const auto &LI : LiveIns) { -      OS << ' ' << printReg(LI.PhysReg, TRI); -      if (!LI.LaneMask.all()) -        OS << ':' << PrintLaneMask(LI.LaneMask); +    // Don't indent(2), align with previous line attributes. +    OS << "; predecessors: "; +    for (auto I = pred_begin(), E = pred_end(); I != E; ++I) { +      if (I != pred_begin()) +        OS << ", "; +      OS << printMBBReference(**I);      }      OS << '\n'; +    HasLineAttributes = true;    } -  // Print the preds of this block according to the CFG. -  if (!pred_empty()) { + +  if (!succ_empty()) {      if (Indexes) OS << '\t'; -    OS << "    Predecessors according to CFG:"; -    for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) -      OS << " " << printMBBReference(*(*PI)); +    // Print the successors +    OS.indent(2) << "successors: "; +    for (auto I = succ_begin(), E = succ_end(); I != E; ++I) { +      if (I != succ_begin()) +        OS << ", "; +      OS << printMBBReference(**I); +      if (!Probs.empty()) +        OS << '(' +           << format("0x%08" PRIx32, getSuccProbability(I).getNumerator()) +           << ')'; +    } +    if (!Probs.empty() && IsStandalone) { +      // Print human readable probabilities as comments. +      OS << "; "; +      for (auto I = succ_begin(), E = succ_end(); I != E; ++I) { +        const BranchProbability &BP = *getProbabilityIterator(I); +        if (I != succ_begin()) +          OS << ", "; +        OS << printMBBReference(**I) << '(' +           << format("%.2f%%", +                     rint(((double)BP.getNumerator() / BP.getDenominator()) * +                          100.0 * 100.0) / +                         100.0) +           << ')'; +      } +    } +      OS << '\n'; +    HasLineAttributes = true;    } -  for (auto &I : instrs()) { +  if (!livein_empty() && MRI.tracksLiveness()) { +    if (Indexes) OS << '\t'; +    OS.indent(2) << "liveins: "; + +    bool First = true; +    for (const auto &LI : liveins()) { +      if (!First) +        OS << ", "; +      First = false; +      OS << printReg(LI.PhysReg, TRI); +      if (!LI.LaneMask.all()) +        OS << ":0x" << PrintLaneMask(LI.LaneMask); +    } +    HasLineAttributes = true; +  } + +  if (HasLineAttributes) +    OS << '\n'; + +  bool IsInBundle = false; +  for (const MachineInstr &MI : instrs()) {      if (Indexes) { -      if (Indexes->hasIndex(I)) -        OS << Indexes->getInstructionIndex(I); +      if (Indexes->hasIndex(MI)) +        OS << Indexes->getInstructionIndex(MI);        OS << '\t';      } -    OS << '\t'; -    if (I.isInsideBundle()) -      OS << "  * "; -    I.print(OS, MST); -  } -  // Print the successors of this block according to the CFG. -  if (!succ_empty()) { -    if (Indexes) OS << '\t'; -    OS << "    Successors according to CFG:"; -    for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { -      OS << " " << printMBBReference(*(*SI)); -      if (!Probs.empty()) -        OS << '(' << *getProbabilityIterator(SI) << ')'; +    if (IsInBundle && !MI.isInsideBundle()) { +      OS.indent(2) << "}\n"; +      IsInBundle = false; +    } + +    OS.indent(IsInBundle ? 4 : 2); +    MI.print(OS, MST, IsStandalone, /*SkipOpers=*/false, /*SkipDebugLoc=*/false, +             /*AddNewLine=*/false, &TII); + +    if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) { +      OS << " {"; +      IsInBundle = true;      }      OS << '\n';    } -  if (IrrLoopHeaderWeight) { + +  if (IsInBundle) +    OS.indent(2) << "}\n"; + +  if (IrrLoopHeaderWeight && IsStandalone) {      if (Indexes) OS << '\t'; -    OS << "    Irreducible loop header weight: " -       << IrrLoopHeaderWeight.getValue(); -    OS << '\n'; +    OS.indent(2) << "; Irreducible loop header weight: " +                 << IrrLoopHeaderWeight.getValue() << '\n';    }  } @@ -382,10 +458,10 @@ bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {  }  void MachineBasicBlock::sortUniqueLiveIns() { -  std::sort(LiveIns.begin(), LiveIns.end(), -            [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { -              return LI0.PhysReg < LI1.PhysReg; -            }); +  llvm::sort(LiveIns.begin(), LiveIns.end(), +             [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { +               return LI0.PhysReg < LI1.PhysReg; +             });    // Liveins are sorted by physreg now we can merge their lanemasks.    LiveInVector::const_iterator I = LiveIns.begin();    LiveInVector::const_iterator J; @@ -583,6 +659,25 @@ void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {    Succ->addPredecessor(this);  } +void MachineBasicBlock::splitSuccessor(MachineBasicBlock *Old, +                                       MachineBasicBlock *New, +                                       bool NormalizeSuccProbs) { +  succ_iterator OldI = llvm::find(successors(), Old); +  assert(OldI != succ_end() && "Old is not a successor of this block!"); +  assert(llvm::find(successors(), New) == succ_end() && +         "New is already a successor of this block!"); + +  // Add a new successor with equal probability as the original one. Note +  // that we directly copy the probability using the iterator rather than +  // getting a potentially synthetic probability computed when unknown. This +  // preserves the probabilities as-is and then we can renormalize them and +  // query them effectively afterward. +  addSuccessor(New, Probs.empty() ? BranchProbability::getUnknown() +                                  : *getProbabilityIterator(OldI)); +  if (NormalizeSuccProbs) +    normalizeSuccProbs(); +} +  void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,                                          bool NormalizeSuccProbs) {    succ_iterator I = find(Successors, Succ); @@ -779,9 +874,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,    MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();    MF->insert(std::next(MachineFunction::iterator(this)), NMBB); -  DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this) -               << " -- " << printMBBReference(*NMBB) << " -- " -               << printMBBReference(*Succ) << '\n'); +  LLVM_DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this) +                    << " -- " << printMBBReference(*NMBB) << " -- " +                    << printMBBReference(*Succ) << '\n');    LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();    SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>(); @@ -810,7 +905,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,          if (TargetRegisterInfo::isPhysicalRegister(Reg) ||              LV->getVarInfo(Reg).removeKill(*MI)) {            KilledRegs.push_back(Reg); -          DEBUG(dbgs() << "Removing terminator kill: " << *MI); +          LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI);            OI->setIsKill(false);          }        } @@ -901,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,            continue;          if (TargetRegisterInfo::isVirtualRegister(Reg))            LV->getVarInfo(Reg).Kills.push_back(&*I); -        DEBUG(dbgs() << "Restored terminator kill: " << *I); +        LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I);          break;        }      } @@ -1034,8 +1129,8 @@ bool MachineBasicBlock::canSplitCriticalEdge(    // case that we can't handle. Since this never happens in properly optimized    // code, just skip those edges.    if (TBB && TBB == FBB) { -    DEBUG(dbgs() << "Won't split critical edge after degenerate " -                 << printMBBReference(*this) << '\n'); +    LLVM_DEBUG(dbgs() << "Won't split critical edge after degenerate " +                      << printMBBReference(*this) << '\n');      return false;    }    return true; @@ -1189,6 +1284,16 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {    return {};  } +/// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE +/// instructions.  Return UnknownLoc if there is none. +DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) { +  if (MBBI == instr_begin()) return {}; +  // Skip debug declarations, we don't want a DebugLoc from them. +  MBBI = skipDebugInstructionsBackward(std::prev(MBBI), instr_begin()); +  if (!MBBI->isDebugInstr()) return MBBI->getDebugLoc(); +  return {}; +} +  /// Find and return the merged DebugLoc of the branch instructions of the block.  /// Return UnknownLoc if there is none.  DebugLoc diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 167135b56ec0..21350df624e7 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -198,10 +198,10 @@ namespace {  class BlockChain; -/// \brief Type for our function-wide basic block -> block chain mapping. +/// Type for our function-wide basic block -> block chain mapping.  using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>; -/// \brief A chain of blocks which will be laid out contiguously. +/// A chain of blocks which will be laid out contiguously.  ///  /// This is the datastructure representing a chain of consecutive blocks that  /// are profitable to layout together in order to maximize fallthrough @@ -213,13 +213,13 @@ using BlockToChainMapType = DenseMap<const MachineBasicBlock *, BlockChain *>;  /// them. They participate in a block-to-chain mapping, which is updated  /// automatically as chains are merged together.  class BlockChain { -  /// \brief The sequence of blocks belonging to this chain. +  /// The sequence of blocks belonging to this chain.    ///    /// This is the sequence of blocks for a particular chain. These will be laid    /// out in-order within the function.    SmallVector<MachineBasicBlock *, 4> Blocks; -  /// \brief A handle to the function-wide basic block to block chain mapping. +  /// A handle to the function-wide basic block to block chain mapping.    ///    /// This is retained in each block chain to simplify the computation of child    /// block chains for SCC-formation and iteration. We store the edges to child @@ -228,7 +228,7 @@ class BlockChain {    BlockToChainMapType &BlockToChain;  public: -  /// \brief Construct a new BlockChain. +  /// Construct a new BlockChain.    ///    /// This builds a new block chain representing a single basic block in the    /// function. It also registers itself as the chain that block participates @@ -239,15 +239,15 @@ public:      BlockToChain[BB] = this;    } -  /// \brief Iterator over blocks within the chain. +  /// Iterator over blocks within the chain.    using iterator = SmallVectorImpl<MachineBasicBlock *>::iterator;    using const_iterator = SmallVectorImpl<MachineBasicBlock *>::const_iterator; -  /// \brief Beginning of blocks within the chain. +  /// Beginning of blocks within the chain.    iterator begin() { return Blocks.begin(); }    const_iterator begin() const { return Blocks.begin(); } -  /// \brief End of blocks within the chain. +  /// End of blocks within the chain.    iterator end() { return Blocks.end(); }    const_iterator end() const { return Blocks.end(); } @@ -261,7 +261,7 @@ public:      return false;    } -  /// \brief Merge a block chain into this one. +  /// Merge a block chain into this one.    ///    /// This routine merges a block chain into this one. It takes care of forming    /// a contiguous sequence of basic blocks, updating the edge list, and @@ -293,14 +293,14 @@ public:    }  #ifndef NDEBUG -  /// \brief Dump the blocks in this chain. +  /// Dump the blocks in this chain.    LLVM_DUMP_METHOD void dump() {      for (MachineBasicBlock *MBB : *this)        MBB->dump();    }  #endif // NDEBUG -  /// \brief Count of predecessors of any block within the chain which have not +  /// Count of predecessors of any block within the chain which have not    /// yet been scheduled.  In general, we will delay scheduling this chain    /// until those predecessors are scheduled (or we find a sufficiently good    /// reason to override this heuristic.)  Note that when forming loop chains, @@ -313,7 +313,7 @@ public:  };  class MachineBlockPlacement : public MachineFunctionPass { -  /// \brief A type for a block filter set. +  /// A type for a block filter set.    using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;    /// Pair struct containing basic block and taildup profitiability @@ -329,47 +329,47 @@ class MachineBlockPlacement : public MachineFunctionPass {      MachineBasicBlock *Dest;    }; -  /// \brief work lists of blocks that are ready to be laid out +  /// work lists of blocks that are ready to be laid out    SmallVector<MachineBasicBlock *, 16> BlockWorkList;    SmallVector<MachineBasicBlock *, 16> EHPadWorkList;    /// Edges that have already been computed as optimal.    DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges; -  /// \brief Machine Function +  /// Machine Function    MachineFunction *F; -  /// \brief A handle to the branch probability pass. +  /// A handle to the branch probability pass.    const MachineBranchProbabilityInfo *MBPI; -  /// \brief A handle to the function-wide block frequency pass. +  /// A handle to the function-wide block frequency pass.    std::unique_ptr<BranchFolder::MBFIWrapper> MBFI; -  /// \brief A handle to the loop info. +  /// A handle to the loop info.    MachineLoopInfo *MLI; -  /// \brief Preferred loop exit. +  /// Preferred loop exit.    /// Member variable for convenience. It may be removed by duplication deep    /// in the call stack.    MachineBasicBlock *PreferredLoopExit; -  /// \brief A handle to the target's instruction info. +  /// A handle to the target's instruction info.    const TargetInstrInfo *TII; -  /// \brief A handle to the target's lowering info. +  /// A handle to the target's lowering info.    const TargetLoweringBase *TLI; -  /// \brief A handle to the post dominator tree. +  /// A handle to the post dominator tree.    MachinePostDominatorTree *MPDT; -  /// \brief Duplicator used to duplicate tails during placement. +  /// Duplicator used to duplicate tails during placement.    ///    /// Placement decisions can open up new tail duplication opportunities, but    /// since tail duplication affects placement decisions of later blocks, it    /// must be done inline.    TailDuplicator TailDup; -  /// \brief Allocator and owner of BlockChain structures. +  /// Allocator and owner of BlockChain structures.    ///    /// We build BlockChains lazily while processing the loop structure of    /// a function. To reduce malloc traffic, we allocate them using this @@ -378,7 +378,7 @@ class MachineBlockPlacement : public MachineFunctionPass {    /// the chains.    SpecificBumpPtrAllocator<BlockChain> ChainAllocator; -  /// \brief Function wide BasicBlock to BlockChain mapping. +  /// Function wide BasicBlock to BlockChain mapping.    ///    /// This mapping allows efficiently moving from any given basic block to the    /// BlockChain it participates in, if any. We use it to, among other things, @@ -425,7 +425,7 @@ class MachineBlockPlacement : public MachineFunctionPass {        MachineBasicBlock *BB, MachineBasicBlock *LPred,        BlockChain &Chain, BlockFilterSet *BlockFilter,        MachineFunction::iterator &PrevUnplacedBlockIt, -      bool &DuplicatedToPred); +      bool &DuplicatedToLPred);    bool hasBetterLayoutPredecessor(        const MachineBasicBlock *BB, const MachineBasicBlock *Succ,        const BlockChain &SuccChain, BranchProbability SuccProb, @@ -441,7 +441,7 @@ class MachineBlockPlacement : public MachineFunctionPass {        MachineFunction::iterator &PrevUnplacedBlockIt,        const BlockFilterSet *BlockFilter); -  /// \brief Add a basic block to the work list if it is appropriate. +  /// Add a basic block to the work list if it is appropriate.    ///    /// If the optional parameter BlockFilter is provided, only MBB    /// present in the set will be added to the worklist. If nullptr @@ -474,7 +474,7 @@ class MachineBlockPlacement : public MachineFunctionPass {    /// fallthroughs.    bool isProfitableToTailDup(      const MachineBasicBlock *BB, const MachineBasicBlock *Succ, -    BranchProbability AdjustedSumProb, +    BranchProbability QProb,      const BlockChain &Chain, const BlockFilterSet *BlockFilter);    /// Check for a trellis layout. @@ -545,7 +545,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,                      "Branch Probability Basic Block Placement", false, false)  #ifndef NDEBUG -/// \brief Helper to print the name of a MBB. +/// Helper to print the name of a MBB.  ///  /// Only used by debug logging.  static std::string getBlockName(const MachineBasicBlock *BB) { @@ -558,7 +558,7 @@ static std::string getBlockName(const MachineBasicBlock *BB) {  }  #endif -/// \brief Mark a chain's successors as having one fewer preds. +/// Mark a chain's successors as having one fewer preds.  ///  /// When a chain is being merged into the "placed" chain, this routine will  /// quickly walk the successors of each block in the chain and mark them as @@ -574,7 +574,7 @@ void MachineBlockPlacement::markChainSuccessors(    }  } -/// \brief Mark a single block's successors as having one fewer preds. +/// Mark a single block's successors as having one fewer preds.  ///  /// Under normal circumstances, this is only called by markChainSuccessors,  /// but if a block that was to be placed is completely tail-duplicated away, @@ -643,7 +643,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors(        if (SuccChain == &Chain) {          SkipSucc = true;        } else if (Succ != *SuccChain->begin()) { -        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n"); +        LLVM_DEBUG(dbgs() << "    " << getBlockName(Succ) +                          << " -> Mid chain!\n");          continue;        }      } @@ -1010,7 +1011,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(      // If we have a trellis, and BB doesn't have the best fallthrough edges,      // we shouldn't choose any successor. We've already looked and there's a      // better fallthrough edge for all the successors. -    DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n"); +    LLVM_DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");      return Result;    } @@ -1027,10 +1028,11 @@ MachineBlockPlacement::getBestTrellisSuccessor(          canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&          isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),                                Chain, BlockFilter)) { -      DEBUG(BranchProbability Succ2Prob = getAdjustedProbability( -                MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb); -            dbgs() << "    Selected: " << getBlockName(Succ2) -                   << ", probability: " << Succ2Prob << " (Tail Duplicate)\n"); +      LLVM_DEBUG(BranchProbability Succ2Prob = getAdjustedProbability( +                     MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb); +                 dbgs() << "    Selected: " << getBlockName(Succ2) +                        << ", probability: " << Succ2Prob +                        << " (Tail Duplicate)\n");        Result.BB = Succ2;        Result.ShouldTailDup = true;        return Result; @@ -1041,10 +1043,10 @@ MachineBlockPlacement::getBestTrellisSuccessor(    ComputedEdges[BestB.Src] = { BestB.Dest, false };    auto TrellisSucc = BestA.Dest; -  DEBUG(BranchProbability SuccProb = getAdjustedProbability( -            MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb); -        dbgs() << "    Selected: " << getBlockName(TrellisSucc) -               << ", probability: " << SuccProb << " (Trellis)\n"); +  LLVM_DEBUG(BranchProbability SuccProb = getAdjustedProbability( +                 MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb); +             dbgs() << "    Selected: " << getBlockName(TrellisSucc) +                    << ", probability: " << SuccProb << " (Trellis)\n");    Result.BB = TrellisSucc;    return Result;  } @@ -1150,7 +1152,7 @@ void MachineBlockPlacement::precomputeTriangleChains() {    if (TriangleChainCount == 0)      return; -  DEBUG(dbgs() << "Pre-computing triangle chains.\n"); +  LLVM_DEBUG(dbgs() << "Pre-computing triangle chains.\n");    // Map from last block to the chain that contains it. This allows us to extend    // chains as we find new triangles.    DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap; @@ -1224,8 +1226,9 @@ void MachineBlockPlacement::precomputeTriangleChains() {      MachineBasicBlock *dst = Chain.Edges.back();      Chain.Edges.pop_back();      for (MachineBasicBlock *src : reverse(Chain.Edges)) { -      DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" << -            getBlockName(dst) << " as pre-computed based on triangles.\n"); +      LLVM_DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" +                        << getBlockName(dst) +                        << " as pre-computed based on triangles.\n");        auto InsertResult = ComputedEdges.insert({src, {dst, true}});        assert(InsertResult.second && "Block seen twice."); @@ -1431,15 +1434,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(    }    if (BadCFGConflict) { -    DEBUG(dbgs() << "    Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb -                 << " (prob) (non-cold CFG conflict)\n"); +    LLVM_DEBUG(dbgs() << "    Not a candidate: " << getBlockName(Succ) << " -> " +                      << SuccProb << " (prob) (non-cold CFG conflict)\n");      return true;    }    return false;  } -/// \brief Select the best successor for a block. +/// Select the best successor for a block.  ///  /// This looks across all successors of a particular block and attempts to  /// select the "best" one to be the layout successor. It only considers direct @@ -1462,7 +1465,8 @@ MachineBlockPlacement::selectBestSuccessor(    auto AdjustedSumProb =        collectViableSuccessors(BB, Chain, BlockFilter, Successors); -  DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n"); +  LLVM_DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) +                    << "\n");    // if we already precomputed the best successor for BB, return that if still    // applicable. @@ -1503,18 +1507,18 @@ MachineBlockPlacement::selectBestSuccessor(        continue;      } -    DEBUG( -        dbgs() << "    Candidate: " << getBlockName(Succ) << ", probability: " -               << SuccProb +    LLVM_DEBUG( +        dbgs() << "    Candidate: " << getBlockName(Succ) +               << ", probability: " << SuccProb                 << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")                 << "\n");      if (BestSucc.BB && BestProb >= SuccProb) { -      DEBUG(dbgs() << "    Not the best candidate, continuing\n"); +      LLVM_DEBUG(dbgs() << "    Not the best candidate, continuing\n");        continue;      } -    DEBUG(dbgs() << "    Setting it as best candidate\n"); +    LLVM_DEBUG(dbgs() << "    Setting it as best candidate\n");      BestSucc.BB = Succ;      BestProb = SuccProb;    } @@ -1539,10 +1543,9 @@ MachineBlockPlacement::selectBestSuccessor(        break;      if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)          && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) { -      DEBUG( -          dbgs() << "    Candidate: " << getBlockName(Succ) << ", probability: " -                 << DupProb -                 << " (Tail Duplicate)\n"); +      LLVM_DEBUG(dbgs() << "    Candidate: " << getBlockName(Succ) +                        << ", probability: " << DupProb +                        << " (Tail Duplicate)\n");        BestSucc.BB = Succ;        BestSucc.ShouldTailDup = true;        break; @@ -1550,12 +1553,12 @@ MachineBlockPlacement::selectBestSuccessor(    }    if (BestSucc.BB) -    DEBUG(dbgs() << "    Selected: " << getBlockName(BestSucc.BB) << "\n"); +    LLVM_DEBUG(dbgs() << "    Selected: " << getBlockName(BestSucc.BB) << "\n");    return BestSucc;  } -/// \brief Select the best block from a worklist. +/// Select the best block from a worklist.  ///  /// This looks through the provided worklist as a list of candidate basic  /// blocks and select the most profitable one to place. The definition of @@ -1596,8 +1599,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(             "Found CFG-violating block");      BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); -    DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> "; -          MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); +    LLVM_DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> "; +               MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");      // For ehpad, we layout the least probable first as to avoid jumping back      // from least probable landingpads to more probable ones. @@ -1627,7 +1630,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(    return BestBlock;  } -/// \brief Retrieve the first unplaced basic block. +/// Retrieve the first unplaced basic block.  ///  /// This routine is called when we are unable to use the CFG to walk through  /// all of the basic blocks and form a chain due to unnatural loops in the CFG. @@ -1723,8 +1726,8 @@ void MachineBlockPlacement::buildChain(        if (!BestSucc)          break; -      DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " -                      "layout successor until the CFG reduces\n"); +      LLVM_DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " +                           "layout successor until the CFG reduces\n");      }      // Placement may have changed tail duplication opportunities. @@ -1743,18 +1746,18 @@ void MachineBlockPlacement::buildChain(      // Zero out UnscheduledPredecessors for the successor we're about to merge in case      // we selected a successor that didn't fit naturally into the CFG.      SuccChain.UnscheduledPredecessors = 0; -    DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " -                 << getBlockName(BestSucc) << "\n"); +    LLVM_DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " +                      << getBlockName(BestSucc) << "\n");      markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);      Chain.merge(BestSucc, &SuccChain);      BB = *std::prev(Chain.end());    } -  DEBUG(dbgs() << "Finished forming chain for header block " -               << getBlockName(*Chain.begin()) << "\n"); +  LLVM_DEBUG(dbgs() << "Finished forming chain for header block " +                    << getBlockName(*Chain.begin()) << "\n");  } -/// \brief Find the best loop top block for layout. +/// Find the best loop top block for layout.  ///  /// Look for a block which is strictly better than the loop header for laying  /// out at the top of the loop. This looks for one and only one pattern: @@ -1784,17 +1787,17 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,    if (!LoopBlockSet.count(*HeaderChain.begin()))      return L.getHeader(); -  DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader()) -               << "\n"); +  LLVM_DEBUG(dbgs() << "Finding best loop top for: " +                    << getBlockName(L.getHeader()) << "\n");    BlockFrequency BestPredFreq;    MachineBasicBlock *BestPred = nullptr;    for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {      if (!LoopBlockSet.count(Pred))        continue; -    DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has " -                 << Pred->succ_size() << " successors, "; -          MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); +    LLVM_DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has " +                      << Pred->succ_size() << " successors, "; +               MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");      if (Pred->succ_size() > 1)        continue; @@ -1809,7 +1812,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,    // If no direct predecessor is fine, just use the loop header.    if (!BestPred) { -    DEBUG(dbgs() << "    final top unchanged\n"); +    LLVM_DEBUG(dbgs() << "    final top unchanged\n");      return L.getHeader();    } @@ -1819,11 +1822,11 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,           *BestPred->pred_begin() != L.getHeader())      BestPred = *BestPred->pred_begin(); -  DEBUG(dbgs() << "    final top: " << getBlockName(BestPred) << "\n"); +  LLVM_DEBUG(dbgs() << "    final top: " << getBlockName(BestPred) << "\n");    return BestPred;  } -/// \brief Find the best loop exiting block for layout. +/// Find the best loop exiting block for layout.  ///  /// This routine implements the logic to analyze the loop looking for the best  /// block to layout at the top of the loop. Typically this is done to maximize @@ -1851,8 +1854,8 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,    // blocks where rotating to exit with that block will reach an outer loop.    SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; -  DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader()) -               << "\n"); +  LLVM_DEBUG(dbgs() << "Finding best loop exit for: " +                    << getBlockName(L.getHeader()) << "\n");    for (MachineBasicBlock *MBB : L.getBlocks()) {      BlockChain &Chain = *BlockToChain[MBB];      // Ensure that this block is at the end of a chain; otherwise it could be @@ -1875,15 +1878,15 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,        BlockChain &SuccChain = *BlockToChain[Succ];        // Don't split chains, either this chain or the successor's chain.        if (&Chain == &SuccChain) { -        DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> " -                     << getBlockName(Succ) << " (chain conflict)\n"); +        LLVM_DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> " +                          << getBlockName(Succ) << " (chain conflict)\n");          continue;        }        auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);        if (LoopBlockSet.count(Succ)) { -        DEBUG(dbgs() << "    looping: " << getBlockName(MBB) << " -> " -                     << getBlockName(Succ) << " (" << SuccProb << ")\n"); +        LLVM_DEBUG(dbgs() << "    looping: " << getBlockName(MBB) << " -> " +                          << getBlockName(Succ) << " (" << SuccProb << ")\n");          HasLoopingSucc = true;          continue;        } @@ -1896,9 +1899,10 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,        }        BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; -      DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> " -                   << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; -            MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); +      LLVM_DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> " +                        << getBlockName(Succ) << " [L:" << SuccLoopDepth +                        << "] ("; +                 MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");        // Note that we bias this toward an existing layout successor to retain        // incoming order in the absence of better information. The exit must have        // a frequency higher than the current exit before we consider breaking @@ -1922,11 +1926,12 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,    // Without a candidate exiting block or with only a single block in the    // loop, just use the loop header to layout the loop.    if (!ExitingBB) { -    DEBUG(dbgs() << "    No other candidate exit blocks, using loop header\n"); +    LLVM_DEBUG( +        dbgs() << "    No other candidate exit blocks, using loop header\n");      return nullptr;    }    if (L.getNumBlocks() == 1) { -    DEBUG(dbgs() << "    Loop has 1 block, using loop header as exit\n"); +    LLVM_DEBUG(dbgs() << "    Loop has 1 block, using loop header as exit\n");      return nullptr;    } @@ -1937,11 +1942,12 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,        !BlocksExitingToOuterLoop.count(ExitingBB))      return nullptr; -  DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) << "\n"); +  LLVM_DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) +                    << "\n");    return ExitingBB;  } -/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// Attempt to rotate an exiting block to the bottom of the loop.  ///  /// Once we have built a chain, try to rotate it to line up the hot exit block  /// with fallthrough out of the loop if doing so doesn't introduce unnecessary @@ -2014,12 +2020,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,          return;    } -  DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) -               << " at bottom\n"); +  LLVM_DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) +                    << " at bottom\n");    std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());  } -/// \brief Attempt to rotate a loop based on profile data to reduce branch cost. +/// Attempt to rotate a loop based on profile data to reduce branch cost.  ///  /// With profile data, we can determine the cost in terms of missed fall through  /// opportunities when rotating a loop chain and select the best rotation. @@ -2150,8 +2156,9 @@ void MachineBlockPlacement::rotateLoopWithProfile(        }      } -    DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter) -                 << " to the top: " << Cost.getFrequency() << "\n"); +    LLVM_DEBUG(dbgs() << "The cost of loop rotation by making " +                      << getBlockName(*Iter) +                      << " to the top: " << Cost.getFrequency() << "\n");      if (Cost < SmallestRotationCost) {        SmallestRotationCost = Cost; @@ -2160,13 +2167,13 @@ void MachineBlockPlacement::rotateLoopWithProfile(    }    if (RotationPos != LoopChain.end()) { -    DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos) -                 << " to the top\n"); +    LLVM_DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos) +                      << " to the top\n");      std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());    }  } -/// \brief Collect blocks in the given loop that are to be placed. +/// Collect blocks in the given loop that are to be placed.  ///  /// When profile data is available, exclude cold blocks from the returned set;  /// otherwise, collect all blocks in the loop. @@ -2202,7 +2209,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {    return LoopBlockSet;  } -/// \brief Forms basic block chains from the natural loop structures. +/// Forms basic block chains from the natural loop structures.  ///  /// These chains are designed to preserve the existing *structure* of the code  /// as much as possible. We can then stitch the chains together in a way which @@ -2265,7 +2272,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {    else      rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet); -  DEBUG({ +  LLVM_DEBUG({      // Crash at the end so we get all of the debugging output first.      bool BadLoop = false;      if (LoopChain.UnscheduledPredecessors) { @@ -2324,9 +2331,9 @@ void MachineBlockPlacement::buildCFGChains() {        // Ensure that the layout successor is a viable block, as we know that        // fallthrough is a possibility.        assert(NextFI != FE && "Can't fallthrough past the last block."); -      DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " -                   << getBlockName(BB) << " -> " << getBlockName(NextBB) -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " +                        << getBlockName(BB) << " -> " << getBlockName(NextBB) +                        << "\n");        Chain->merge(NextBB, nullptr);  #ifndef NDEBUG        BlocksWithUnanalyzableExits.insert(&*BB); @@ -2356,7 +2363,7 @@ void MachineBlockPlacement::buildCFGChains() {  #ifndef NDEBUG    using FunctionBlockSetType = SmallPtrSet<MachineBasicBlock *, 16>;  #endif -  DEBUG({ +  LLVM_DEBUG({      // Crash at the end so we get all of the debugging output first.      bool BadFunc = false;      FunctionBlockSetType FunctionBlockSet; @@ -2381,11 +2388,11 @@ void MachineBlockPlacement::buildCFGChains() {    // Splice the blocks into place.    MachineFunction::iterator InsertPos = F->begin(); -  DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n"); +  LLVM_DEBUG(dbgs() << "[MBP] Function: " << F->getName() << "\n");    for (MachineBasicBlock *ChainBB : FunctionChain) { -    DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " -                                                       : "          ... ") -                 << getBlockName(ChainBB) << "\n"); +    LLVM_DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " +                                                            : "          ... ") +                      << getBlockName(ChainBB) << "\n");      if (InsertPos != MachineFunction::iterator(ChainBB))        F->splice(InsertPos, ChainBB);      else @@ -2470,11 +2477,11 @@ void MachineBlockPlacement::optimizeBranches() {            MBPI->getEdgeProbability(ChainBB, FBB) >                MBPI->getEdgeProbability(ChainBB, TBB) &&            !TII->reverseBranchCondition(Cond)) { -        DEBUG(dbgs() << "Reverse order of the two branches: " -                     << getBlockName(ChainBB) << "\n"); -        DEBUG(dbgs() << "    Edge probability: " -                     << MBPI->getEdgeProbability(ChainBB, FBB) << " vs " -                     << MBPI->getEdgeProbability(ChainBB, TBB) << "\n"); +        LLVM_DEBUG(dbgs() << "Reverse order of the two branches: " +                          << getBlockName(ChainBB) << "\n"); +        LLVM_DEBUG(dbgs() << "    Edge probability: " +                          << MBPI->getEdgeProbability(ChainBB, FBB) << " vs " +                          << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");          DebugLoc dl; // FIXME: this is nowhere          TII->removeBranch(*ChainBB);          TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl); @@ -2638,8 +2645,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(    if (!shouldTailDuplicate(BB))      return false; -  DEBUG(dbgs() << "Redoing tail duplication for Succ#" -        << BB->getNumber() << "\n"); +  LLVM_DEBUG(dbgs() << "Redoing tail duplication for Succ#" << BB->getNumber() +                    << "\n");    // This has to be a callback because none of it can be done after    // BB is deleted. @@ -2687,8 +2694,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(          if (RemBB == PreferredLoopExit)            PreferredLoopExit = nullptr; -        DEBUG(dbgs() << "TailDuplicator deleted block: " -              << getBlockName(RemBB) << "\n"); +        LLVM_DEBUG(dbgs() << "TailDuplicator deleted block: " +                          << getBlockName(RemBB) << "\n");        };    auto RemovalCallbackRef =        function_ref<void(MachineBasicBlock*)>(RemovalCallback); @@ -2752,7 +2759,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {      TailDupSize = TailDupPlacementAggressiveThreshold;    TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); -  // For agressive optimization, we can adjust some thresholds to be less +  // For aggressive optimization, we can adjust some thresholds to be less    // conservative.    if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {      // At O3 we should be more willing to copy blocks for tail duplication. This @@ -2834,17 +2841,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {  namespace { -/// \brief A pass to compute block placement statistics. +/// A pass to compute block placement statistics.  ///  /// A separate pass to compute interesting statistics for evaluating block  /// placement. This is separate from the actual placement pass so that they can  /// be computed in the absence of any placement transformations or when using  /// alternative placement strategies.  class MachineBlockPlacementStats : public MachineFunctionPass { -  /// \brief A handle to the branch probability pass. +  /// A handle to the branch probability pass.    const MachineBranchProbabilityInfo *MBPI; -  /// \brief A handle to the function-wide block frequency pass. +  /// A handle to the function-wide block frequency pass.    const MachineBlockFrequencyInfo *MBFI;  public: diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 53c0d840ac84..6c92b1d426d6 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -176,11 +176,10 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,      // class given a super-reg class and subreg index.      if (DefMI->getOperand(1).getSubReg())        continue; -    const TargetRegisterClass *RC = MRI->getRegClass(Reg); -    if (!MRI->constrainRegClass(SrcReg, RC)) +    if (!MRI->constrainRegAttrs(SrcReg, Reg))        continue; -    DEBUG(dbgs() << "Coalescing: " << *DefMI); -    DEBUG(dbgs() << "***     to: " << *MI); +    LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); +    LLVM_DEBUG(dbgs() << "***     to: " << *MI);      // Propagate SrcReg of copies to MI.      MO.setReg(SrcReg);      MRI->clearKillFlags(SrcReg); @@ -315,7 +314,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,    unsigned LookAheadLeft = LookAheadLimit;    while (LookAheadLeft) {      // Skip over dbg_value's. -    while (I != E && I != EE && I->isDebugValue()) +    while (I != E && I != EE && I->isDebugInstr())        ++I;      if (I == EE) { @@ -354,7 +353,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,  bool MachineCSE::isCSECandidate(MachineInstr *MI) {    if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || -      MI->isInlineAsm() || MI->isDebugValue()) +      MI->isInlineAsm() || MI->isDebugInstr())      return false;    // Ignore copies. @@ -446,25 +445,23 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,    // Heuristics #3: If the common subexpression is used by PHIs, do not reuse    // it unless the defined value is already used in the BB of the new use.    bool HasPHI = false; -  SmallPtrSet<MachineBasicBlock*, 4> CSBBs; -  for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { -    HasPHI |= MI.isPHI(); -    CSBBs.insert(MI.getParent()); +  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) { +    HasPHI |= UseMI.isPHI(); +    if (UseMI.getParent() == MI->getParent()) +      return true;    } -  if (!HasPHI) -    return true; -  return CSBBs.count(MI->getParent()); +  return !HasPHI;  }  void MachineCSE::EnterScope(MachineBasicBlock *MBB) { -  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');    ScopeType *Scope = new ScopeType(VNT);    ScopeMap[MBB] = Scope;  }  void MachineCSE::ExitScope(MachineBasicBlock *MBB) { -  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');    DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);    assert(SI != ScopeMap.end());    delete SI->second; @@ -548,13 +545,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {      // Found a common subexpression, eliminate it.      unsigned CSVN = VNT.lookup(MI);      MachineInstr *CSMI = Exps[CSVN]; -    DEBUG(dbgs() << "Examining: " << *MI); -    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); +    LLVM_DEBUG(dbgs() << "Examining: " << *MI); +    LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);      // Check if it's profitable to perform this CSE.      bool DoCSE = true; -    unsigned NumDefs = MI->getDesc().getNumDefs() + -                       MI->getDesc().getNumImplicitDefs(); +    unsigned NumDefs = MI->getNumDefs();      for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {        MachineOperand &MO = MI->getOperand(i); @@ -583,16 +579,17 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {               "Do not CSE physical register defs!");        if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { -        DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); +        LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");          DoCSE = false;          break;        } -      // Don't perform CSE if the result of the old instruction cannot exist -      // within the register class of the new instruction. -      const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); -      if (!MRI->constrainRegClass(NewReg, OldRC)) { -        DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); +      // Don't perform CSE if the result of the new instruction cannot exist +      // within the constraints (register class, bank, or low-level type) of +      // the old instruction. +      if (!MRI->constrainRegAttrs(NewReg, OldReg)) { +        LLVM_DEBUG( +            dbgs() << "*** Not the same register constraints, avoid CSE!\n");          DoCSE = false;          break;        } diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index 702d21228477..0c6efff7bb40 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -39,8 +39,27 @@ inc_threshold("machine-combiner-inc-threshold", cl::Hidden,                cl::desc("Incremental depth computation will be used for basic "                         "blocks with more instructions."), cl::init(500)); +static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden, +                                cl::desc("Dump all substituted intrs"), +                                cl::init(false)); + +#ifdef EXPENSIVE_CHECKS +static cl::opt<bool> VerifyPatternOrder( +    "machine-combiner-verify-pattern-order", cl::Hidden, +    cl::desc( +        "Verify that the generated patterns are ordered by increasing latency"), +    cl::init(true)); +#else +static cl::opt<bool> VerifyPatternOrder( +    "machine-combiner-verify-pattern-order", cl::Hidden, +    cl::desc( +        "Verify that the generated patterns are ordered by increasing latency"), +    cl::init(false)); +#endif +  namespace {  class MachineCombiner : public MachineFunctionPass { +  const TargetSubtargetInfo *STI;    const TargetInstrInfo *TII;    const TargetRegisterInfo *TRI;    MCSchedModel SchedModel; @@ -85,6 +104,14 @@ private:                              SmallVectorImpl<MachineInstr *> &DelInstrs);    void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,                       SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); +  std::pair<unsigned, unsigned> +  getLatenciesForInstrSequences(MachineInstr &MI, +                                SmallVectorImpl<MachineInstr *> &InsInstrs, +                                SmallVectorImpl<MachineInstr *> &DelInstrs, +                                MachineTraceMetrics::Trace BlockTrace); + +  void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root, +                          SmallVector<MachineCombinerPattern, 16> &Patterns);  };  } @@ -140,9 +167,6 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,    // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth    for (auto *InstrPtr : InsInstrs) { // for each Use      unsigned IDepth = 0; -    DEBUG(dbgs() << "NEW INSTR "; -          InstrPtr->print(dbgs(), TII); -          dbgs() << "\n";);      for (const MachineOperand &MO : InstrPtr->operands()) {        // Check for virtual register operand.        if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) @@ -242,6 +266,29 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {    }  } +/// Estimate the latency of the new and original instruction sequence by summing +/// up the latencies of the inserted and deleted instructions. This assumes +/// that the inserted and deleted instructions are dependent instruction chains, +/// which might not hold in all cases. +std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences( +    MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs, +    SmallVectorImpl<MachineInstr *> &DelInstrs, +    MachineTraceMetrics::Trace BlockTrace) { +  assert(!InsInstrs.empty() && "Only support sequences that insert instrs."); +  unsigned NewRootLatency = 0; +  // NewRoot is the last instruction in the \p InsInstrs vector. +  MachineInstr *NewRoot = InsInstrs.back(); +  for (unsigned i = 0; i < InsInstrs.size() - 1; i++) +    NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]); +  NewRootLatency += getLatency(&MI, NewRoot, BlockTrace); + +  unsigned RootLatency = 0; +  for (auto I : DelInstrs) +    RootLatency += TSchedModel.computeInstrLatency(I); + +  return {NewRootLatency, RootLatency}; +} +  /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.  /// The new code sequence ends in MI NewRoot. A necessary condition for the new  /// sequence to replace the old sequence is that it cannot lengthen the critical @@ -257,56 +304,50 @@ bool MachineCombiner::improvesCriticalPathLen(      bool SlackIsAccurate) {    assert(TSchedModel.hasInstrSchedModelOrItineraries() &&           "Missing machine model\n"); -  // NewRoot is the last instruction in the \p InsInstrs vector. -  unsigned NewRootIdx = InsInstrs.size() - 1; -  MachineInstr *NewRoot = InsInstrs[NewRootIdx]; -    // Get depth and latency of NewRoot and Root.    unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);    unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; -  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n"; -        dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; -        dbgs() << " RootDepth: " << RootDepth << "\n"); +  LLVM_DEBUG(dbgs() << "  Dependence data for " << *Root << "\tNewRootDepth: " +                    << NewRootDepth << "\tRootDepth: " << RootDepth);    // For a transform such as reassociation, the cost equation is    // conservatively calculated so that we must improve the depth (data    // dependency cycles) in the critical path to proceed with the transform.    // Being conservative also protects against inaccuracies in the underlying    // machine trace metrics and CPU models. -  if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) +  if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) { +    LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth "); +    LLVM_DEBUG(NewRootDepth < RootDepth +                   ? dbgs() << "\t  and it does it\n" +                   : dbgs() << "\t  but it does NOT do it\n");      return NewRootDepth < RootDepth; +  }    // A more flexible cost calculation for the critical path includes the slack    // of the original code sequence. This may allow the transform to proceed    // even if the instruction depths (data dependency cycles) become worse.    // Account for the latency of the inserted and deleted instructions by -  // adding up their latencies. This assumes that the inserted and deleted -  // instructions are dependent instruction chains, which might not hold -  // in all cases. -  unsigned NewRootLatency = 0; -  for (unsigned i = 0; i < InsInstrs.size() - 1; i++) -    NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]); -  NewRootLatency += getLatency(Root, NewRoot, BlockTrace); - -  unsigned RootLatency = 0; -  for (auto I : DelInstrs) -    RootLatency += TSchedModel.computeInstrLatency(I); +  unsigned NewRootLatency, RootLatency; +  std::tie(NewRootLatency, RootLatency) = +      getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);    unsigned RootSlack = BlockTrace.getInstrSlack(*Root);    unsigned NewCycleCount = NewRootDepth + NewRootLatency; -  unsigned OldCycleCount = RootDepth + RootLatency + -                           (SlackIsAccurate ? RootSlack : 0); -  DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; -        dbgs() << " RootLatency: " << RootLatency << "\n"; -        dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate=" -               << SlackIsAccurate << "\n"; -        dbgs() << " NewRootDepth + NewRootLatency = " -               << NewCycleCount << "\n"; -        dbgs() << " RootDepth + RootLatency + RootSlack = " -               << OldCycleCount << "\n"; -        ); +  unsigned OldCycleCount = +      RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0); +  LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency +                    << "\tRootLatency: " << RootLatency << "\n\tRootSlack: " +                    << RootSlack << " SlackIsAccurate=" << SlackIsAccurate +                    << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount +                    << "\n\tRootDepth + RootLatency + RootSlack = " +                    << OldCycleCount;); +  LLVM_DEBUG(NewCycleCount <= OldCycleCount +                 ? dbgs() << "\n\t  It IMPROVES PathLen because" +                 : dbgs() << "\n\t  It DOES NOT improve PathLen because"); +  LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount +                    << ", OldCycleCount = " << OldCycleCount << "\n");    return NewCycleCount <= OldCycleCount;  } @@ -352,9 +393,14 @@ bool MachineCombiner::preservesResourceLen(    unsigned ResLenAfterCombine =        BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); -  DEBUG(dbgs() << "RESOURCE DATA: \n"; -        dbgs() << " resource len before: " << ResLenBeforeCombine -               << " after: " << ResLenAfterCombine << "\n";); +  LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: " +                    << ResLenBeforeCombine +                    << " and after: " << ResLenAfterCombine << "\n";); +  LLVM_DEBUG( +      ResLenAfterCombine <= ResLenBeforeCombine +          ? dbgs() << "\t\t  As result it IMPROVES/PRESERVES Resource Length\n" +          : dbgs() << "\t\t  As result it DOES NOT improve/preserve Resource " +                      "Length\n");    return ResLenAfterCombine <= ResLenBeforeCombine;  } @@ -409,6 +455,35 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,    NumInstCombined++;  } +// Check that the difference between original and new latency is decreasing for +// later patterns. This helps to discover sub-optimal pattern orderings. +void MachineCombiner::verifyPatternOrder( +    MachineBasicBlock *MBB, MachineInstr &Root, +    SmallVector<MachineCombinerPattern, 16> &Patterns) { +  long PrevLatencyDiff = std::numeric_limits<long>::max(); +  (void)PrevLatencyDiff; // Variable is used in assert only. +  for (auto P : Patterns) { +    SmallVector<MachineInstr *, 16> InsInstrs; +    SmallVector<MachineInstr *, 16> DelInstrs; +    DenseMap<unsigned, unsigned> InstrIdxForVirtReg; +    TII->genAlternativeCodeSequence(Root, P, InsInstrs, DelInstrs, +                                    InstrIdxForVirtReg); +    // Found pattern, but did not generate alternative sequence. +    // This can happen e.g. when an immediate could not be materialized +    // in a single instruction. +    if (InsInstrs.empty() || !TSchedModel.hasInstrSchedModelOrItineraries()) +      continue; + +    unsigned NewRootLatency, RootLatency; +    std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences( +        Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB)); +    long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency); +    assert(CurrentLatencyDiff <= PrevLatencyDiff && +           "Current pattern is better than previous pattern."); +    PrevLatencyDiff = CurrentLatencyDiff; +  } +} +  /// Substitute a slow code sequence with a faster one by  /// evaluating instruction combining pattern.  /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction @@ -418,7 +493,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,  /// sequence is shorter.  bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {    bool Changed = false; -  DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); +  LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");    bool IncrementalUpdate = false;    auto BlockIter = MBB->begin(); @@ -433,8 +508,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {    while (BlockIter != MBB->end()) {      auto &MI = *BlockIter++; - -    DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);      SmallVector<MachineCombinerPattern, 16> Patterns;      // The motivating example is:      // @@ -459,11 +532,16 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {      // The algorithm does not try to evaluate all patterns and pick the best.      // This is only an artificial restriction though. In practice there is      // mostly one pattern, and getMachineCombinerPatterns() can order patterns -    // based on an internal cost heuristic. +    // based on an internal cost heuristic. If +    // machine-combiner-verify-pattern-order is enabled, all patterns are +    // checked to ensure later patterns do not provide better latency savings.      if (!TII->getMachineCombinerPatterns(MI, Patterns))        continue; +    if (VerifyPatternOrder) +      verifyPatternOrder(MBB, MI, Patterns); +      for (auto P : Patterns) {        SmallVector<MachineInstr *, 16> InsInstrs;        SmallVector<MachineInstr *, 16> DelInstrs; @@ -478,6 +556,19 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {        if (!NewInstCount)          continue; +      LLVM_DEBUG(if (dump_intrs) { +        dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n"; +        for (auto const *InstrPtr : DelInstrs) { +          dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; +          InstrPtr->print(dbgs(), false, false, false, TII); +        } +        dbgs() << "\tThese instructions could replace the removed ones\n"; +        for (auto const *InstrPtr : InsInstrs) { +          dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": "; +          InstrPtr->print(dbgs(), false, false, false, TII); +        } +      }); +        bool SubstituteAlways = false;        if (ML && TII->isThroughputPattern(P))          SubstituteAlways = true; @@ -539,20 +630,22 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {  }  bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { -  const TargetSubtargetInfo &STI = MF.getSubtarget(); -  TII = STI.getInstrInfo(); -  TRI = STI.getRegisterInfo(); -  SchedModel = STI.getSchedModel(); -  TSchedModel.init(SchedModel, &STI, TII); +  STI = &MF.getSubtarget(); +  TII = STI->getInstrInfo(); +  TRI = STI->getRegisterInfo(); +  SchedModel = STI->getSchedModel(); +  TSchedModel.init(STI);    MRI = &MF.getRegInfo();    MLI = &getAnalysis<MachineLoopInfo>();    Traces = &getAnalysis<MachineTraceMetrics>();    MinInstr = nullptr;    OptSize = MF.getFunction().optForSize(); -  DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');    if (!TII->useMachineCombiner()) { -    DEBUG(dbgs() << "  Skipping pass: Target does not support machine combiner\n"); +    LLVM_DEBUG( +        dbgs() +        << "  Skipping pass: Target does not support machine combiner\n");      return false;    } diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index fcec05adc732..3bf8147a06c3 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -9,6 +9,35 @@  //  // This is an extremely simple MachineInstr-level copy propagation pass.  // +// This pass forwards the source of COPYs to the users of their destinations +// when doing so is legal.  For example: +// +//   %reg1 = COPY %reg0 +//   ... +//   ... = OP %reg1 +// +// If +//   - %reg0 has not been clobbered by the time of the use of %reg1 +//   - the register class constraints are satisfied +//   - the COPY def is the only value that reaches OP +// then this pass replaces the above with: +// +//   %reg1 = COPY %reg0 +//   ... +//   ... = OP %reg0 +// +// This pass also removes some redundant COPYs.  For example: +// +//    %R1 = COPY %R0 +//    ... // No clobber of %R1 +//    %R0 = COPY %R1 <<< Removed +// +// or +// +//    %R1 = COPY %R0 +//    ... // No clobber of %R0 +//    %R1 = COPY %R0 <<< Removed +//  //===----------------------------------------------------------------------===//  #include "llvm/ADT/DenseMap.h" @@ -23,11 +52,13 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/MC/MCRegisterInfo.h"  #include "llvm/Pass.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h"  #include "llvm/Support/raw_ostream.h"  #include <cassert>  #include <iterator> @@ -37,6 +68,9 @@ using namespace llvm;  #define DEBUG_TYPE "machine-cp"  STATISTIC(NumDeletes, "Number of dead copies deleted"); +STATISTIC(NumCopyForwards, "Number of copy uses forwarded"); +DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", +              "Controls which register COPYs are forwarded");  namespace { @@ -73,6 +107,10 @@ using Reg2MIMap = DenseMap<unsigned, MachineInstr *>;      void ReadRegister(unsigned Reg);      void CopyPropagateBlock(MachineBasicBlock &MBB);      bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); +    void forwardUses(MachineInstr &MI); +    bool isForwardableRegClassCopy(const MachineInstr &Copy, +                                   const MachineInstr &UseI, unsigned UseIdx); +    bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);      /// Candidates for deletion.      SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; @@ -143,7 +181,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) {    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {      Reg2MIMap::iterator CI = CopyMap.find(*AI);      if (CI != CopyMap.end()) { -      DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); +      LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; +                 CI->second->dump());        MaybeDeadCopies.remove(CI->second);      }    } @@ -191,7 +230,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,    if (!isNopCopy(PrevCopy, Src, Def, TRI))      return false; -  DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); +  LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());    // Copy was redundantly redefining either Src or Def. Remove earlier kill    // flags between Copy and PrevCopy because the value will be reused now. @@ -208,14 +247,163 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,    return true;  } +/// Decide whether we should forward the source of \param Copy to its use in +/// \param UseI based on the physical register class constraints of the opcode +/// and avoiding introducing more cross-class COPYs. +bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, +                                                       const MachineInstr &UseI, +                                                       unsigned UseIdx) { + +  unsigned CopySrcReg = Copy.getOperand(1).getReg(); + +  // If the new register meets the opcode register constraints, then allow +  // forwarding. +  if (const TargetRegisterClass *URC = +          UseI.getRegClassConstraint(UseIdx, TII, TRI)) +    return URC->contains(CopySrcReg); + +  if (!UseI.isCopy()) +    return false; + +  /// COPYs don't have register class constraints, so if the user instruction +  /// is a COPY, we just try to avoid introducing additional cross-class +  /// COPYs.  For example: +  /// +  ///   RegClassA = COPY RegClassB  // Copy parameter +  ///   ... +  ///   RegClassB = COPY RegClassA  // UseI parameter +  /// +  /// which after forwarding becomes +  /// +  ///   RegClassA = COPY RegClassB +  ///   ... +  ///   RegClassB = COPY RegClassB +  /// +  /// so we have reduced the number of cross-class COPYs and potentially +  /// introduced a nop COPY that can be removed. +  const TargetRegisterClass *UseDstRC = +      TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); + +  const TargetRegisterClass *SuperRC = UseDstRC; +  for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); +       SuperRC; SuperRC = *SuperRCI++) +    if (SuperRC->contains(CopySrcReg)) +      return true; + +  return false; +} + +/// Check that \p MI does not have implicit uses that overlap with it's \p Use +/// operand (the register being replaced), since these can sometimes be +/// implicitly tied to other operands.  For example, on AMDGPU: +/// +/// V_MOVRELS_B32_e32 %VGPR2, %M0<imp-use>, %EXEC<imp-use>, %VGPR2_VGPR3_VGPR4_VGPR5<imp-use> +/// +/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no +/// way of knowing we need to update the latter when updating the former. +bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI, +                                                const MachineOperand &Use) { +  for (const MachineOperand &MIUse : MI.uses()) +    if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() && +        MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg())) +      return true; + +  return false; +} + +/// Look for available copies whose destination register is used by \p MI and +/// replace the use in \p MI with the copy's source register. +void MachineCopyPropagation::forwardUses(MachineInstr &MI) { +  if (AvailCopyMap.empty()) +    return; + +  // Look for non-tied explicit vreg uses that have an active COPY +  // instruction that defines the physical register allocated to them. +  // Replace the vreg with the source of the active COPY. +  for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd; +       ++OpIdx) { +    MachineOperand &MOUse = MI.getOperand(OpIdx); +    // Don't forward into undef use operands since doing so can cause problems +    // with the machine verifier, since it doesn't treat undef reads as reads, +    // so we can end up with a live range that ends on an undef read, leading to +    // an error that the live range doesn't end on a read of the live range +    // register. +    if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() || +        MOUse.isImplicit()) +      continue; + +    if (!MOUse.getReg()) +      continue; + +    // Check that the register is marked 'renamable' so we know it is safe to +    // rename it without violating any constraints that aren't expressed in the +    // IR (e.g. ABI or opcode requirements). +    if (!MOUse.isRenamable()) +      continue; + +    auto CI = AvailCopyMap.find(MOUse.getReg()); +    if (CI == AvailCopyMap.end()) +      continue; + +    MachineInstr &Copy = *CI->second; +    unsigned CopyDstReg = Copy.getOperand(0).getReg(); +    const MachineOperand &CopySrc = Copy.getOperand(1); +    unsigned CopySrcReg = CopySrc.getReg(); + +    // FIXME: Don't handle partial uses of wider COPYs yet. +    if (MOUse.getReg() != CopyDstReg) { +      LLVM_DEBUG( +          dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n  " +                 << MI); +      continue; +    } + +    // Don't forward COPYs of reserved regs unless they are constant. +    if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg)) +      continue; + +    if (!isForwardableRegClassCopy(Copy, MI, OpIdx)) +      continue; + +    if (hasImplicitOverlap(MI, MOUse)) +      continue; + +    if (!DebugCounter::shouldExecute(FwdCounter)) { +      LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n  " +                        << MI); +      continue; +    } + +    LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI) +                      << "\n     with " << printReg(CopySrcReg, TRI) +                      << "\n     in " << MI << "     from " << Copy); + +    MOUse.setReg(CopySrcReg); +    if (!CopySrc.isRenamable()) +      MOUse.setIsRenamable(false); + +    LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); + +    // Clear kill markers that may have been invalidated. +    for (MachineInstr &KMI : +         make_range(Copy.getIterator(), std::next(MI.getIterator()))) +      KMI.clearRegisterKills(CopySrcReg, TRI); + +    ++NumCopyForwards; +    Changed = true; +  } +} +  void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { -  DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); +  LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {      MachineInstr *MI = &*I;      ++I; -    if (MI->isCopy()) { +    // Analyze copies (which don't overlap themselves). +    if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(), +                                          MI->getOperand(1).getReg())) {        unsigned Def = MI->getOperand(0).getReg();        unsigned Src = MI->getOperand(1).getReg(); @@ -241,6 +429,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {        if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))          continue; +      forwardUses(*MI); + +      // Src may have been changed by forwardUses() +      Src = MI->getOperand(1).getReg(); +        // If Src is defined by a previous copy, the previous copy cannot be        // eliminated.        ReadRegister(Src); @@ -253,7 +446,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {          ReadRegister(Reg);        } -      DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); +      LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());        // Copy is now a candidate for deletion.        if (!MRI->isReserved(Def)) @@ -292,6 +485,20 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {        continue;      } +    // Clobber any earlyclobber regs first. +    for (const MachineOperand &MO : MI->operands()) +      if (MO.isReg() && MO.isEarlyClobber()) { +        unsigned Reg = MO.getReg(); +        // If we have a tied earlyclobber, that means it is also read by this +        // instruction, so we need to make sure we don't remove it as dead +        // later. +        if (MO.isTied()) +          ReadRegister(Reg); +        ClobberRegister(Reg); +      } + +    forwardUses(*MI); +      // Not a copy.      SmallVector<unsigned, 2> Defs;      const MachineOperand *RegMask = nullptr; @@ -307,10 +514,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {        assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&               "MachineCopyPropagation should be run after register allocation!"); -      if (MO.isDef()) { +      if (MO.isDef() && !MO.isEarlyClobber()) {          Defs.push_back(Reg);          continue; -      } else if (MO.readsReg()) +      } else if (!MO.isDebug() && MO.readsReg())          ReadRegister(Reg);      } @@ -331,8 +538,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {            continue;          } -        DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; -              MaybeDead->dump()); +        LLVM_DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; +                   MaybeDead->dump());          // erase() will return the next valid iterator pointing to the next          // element after the erased one. @@ -364,6 +571,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {    // since we don't want to trust live-in lists.    if (MBB.succ_empty()) {      for (MachineInstr *MaybeDead : MaybeDeadCopies) { +      LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; +                 MaybeDead->dump());        assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));        MaybeDead->eraseFromParent();        Changed = true; diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp index 517ac29b6450..6b2802626456 100644 --- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -65,8 +65,21 @@ void MachineDominatorTree::releaseMemory() {  }  void MachineDominatorTree::verifyAnalysis() const { -  if (DT && VerifyMachineDomInfo) -    verifyDomTree(); +  if (DT && VerifyMachineDomInfo) { +    MachineFunction &F = *getRoot()->getParent(); + +    DomTreeBase<MachineBasicBlock> OtherDT; +    OtherDT.recalculate(F); +    if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || +        DT->compare(OtherDT)) { +      errs() << "MachineDominatorTree for function " << F.getName() +            << " is not up to date!\nComputed:\n"; +      DT->print(errs()); +      errs() << "\nActual:\n"; +      OtherDT.print(errs()); +      abort(); +    } +  }  }  void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { @@ -138,21 +151,3 @@ void MachineDominatorTree::applySplitCriticalEdges() const {    NewBBs.clear();    CriticalEdgesToSplit.clear();  } - -void MachineDominatorTree::verifyDomTree() const { -  if (!DT) -    return; -  MachineFunction &F = *getRoot()->getParent(); - -  DomTreeBase<MachineBasicBlock> OtherDT; -  OtherDT.recalculate(F); -  if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || -      DT->compare(OtherDT)) { -    errs() << "MachineDominatorTree for function " << F.getName() -           << " is not up to date!\nComputed:\n"; -    DT->print(errs()); -    errs() << "\nActual:\n"; -    OtherDT.print(errs()); -    abort(); -  } -} diff --git a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp index 2aa9d6b816c8..0b316871dbdf 100644 --- a/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -20,6 +20,7 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h"  #include <cassert> @@ -40,9 +41,9 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,                                             unsigned StackAlign) {    if (!ShouldClamp || Align <= StackAlign)      return Align; -  DEBUG(dbgs() << "Warning: requested alignment " << Align -               << " exceeds the stack alignment " << StackAlign -               << " when stack realignment is off" << '\n'); +  LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align +                    << " exceeds the stack alignment " << StackAlign +                    << " when stack realignment is off" << '\n');    return StackAlign;  } @@ -217,7 +218,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{      OS << "  fi#" << (int)(i-NumFixedObjects) << ": ";      if (SO.StackID != 0) -      OS << "id=" << SO.StackID << ' '; +      OS << "id=" << static_cast<unsigned>(SO.StackID) << ' ';      if (SO.Size == ~0ULL) {        OS << "dead\n"; diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index bc8eb1429d92..dd668bcf6193 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -37,7 +37,9 @@  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h"  #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/Constant.h" @@ -174,6 +176,11 @@ void MachineFunction::init() {      WinEHInfo = new (Allocator) WinEHFuncInfo();    } +  if (isScopedEHPersonality(classifyEHPersonality( +          F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) { +    WasmEHInfo = new (Allocator) WasmEHFuncInfo(); +  } +    assert(Target.isCompatibleDataLayout(getDataLayout()) &&           "Can't create a MachineFunction using a Module with a "           "Target-incompatible DataLayout attached\n"); @@ -195,6 +202,7 @@ void MachineFunction::clear() {    // Do call MachineBasicBlock destructors, it contains std::vectors.    for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))      I->Insts.clearAndLeakNodesUnsafely(); +  MBBNumbering.clear();    InstructionRecycler.clear(Allocator);    OperandRecycler.clear(Allocator); @@ -478,6 +486,14 @@ const char *MachineFunction::createExternalSymbolName(StringRef Name) {    return Dest;  } +uint32_t *MachineFunction::allocateRegMask() { +  unsigned NumRegs = getSubtarget().getRegisterInfo()->getNumRegs(); +  unsigned Size = MachineOperand::getRegMaskSize(NumRegs); +  uint32_t *Mask = Allocator.Allocate<uint32_t>(Size); +  memset(Mask, 0, Size * sizeof(Mask[0])); +  return Mask; +} +  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void MachineFunction::dump() const {    print(dbgs()); @@ -522,7 +538,8 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {    MST.incorporateFunction(getFunction());    for (const auto &BB : *this) {      OS << '\n'; -    BB.print(OS, MST, Indexes); +    // If we print the whole function, print it at its most verbose level. +    BB.print(OS, MST, Indexes, /*IsStandalone=*/true);    }    OS << "\n# End machine code for function " << getName() << ".\n\n"; diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp index 5ffe33006131..67ac95740e3e 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -24,7 +24,6 @@  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/StackProtector.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h" @@ -85,7 +84,6 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addPreserved<MemoryDependenceWrapperPass>();    AU.addPreserved<ScalarEvolutionWrapperPass>();    AU.addPreserved<SCEVAAWrapperPass>(); -  AU.addPreserved<StackProtector>();    FunctionPass::getAnalysisUsage(AU);  } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 14655c6eb700..96fcfdb72ad7 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -37,6 +37,7 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/DebugLoc.h" @@ -74,6 +75,29 @@  using namespace llvm; +static const MachineFunction *getMFIfAvailable(const MachineInstr &MI) { +  if (const MachineBasicBlock *MBB = MI.getParent()) +    if (const MachineFunction *MF = MBB->getParent()) +      return MF; +  return nullptr; +} + +// Try to crawl up to the machine function and get TRI and IntrinsicInfo from +// it. +static void tryToGetTargetInfo(const MachineInstr &MI, +                               const TargetRegisterInfo *&TRI, +                               const MachineRegisterInfo *&MRI, +                               const TargetIntrinsicInfo *&IntrinsicInfo, +                               const TargetInstrInfo *&TII) { + +  if (const MachineFunction *MF = getMFIfAvailable(MI)) { +    TRI = MF->getSubtarget().getRegisterInfo(); +    MRI = &MF->getRegInfo(); +    IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); +    TII = MF->getSubtarget().getInstrInfo(); +  } +} +  void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {    if (MCID->ImplicitDefs)      for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; @@ -358,6 +382,12 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {    return std::make_pair(MemBegin, CombinedNumMemRefs);  } +uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { +  // For now, the just return the union of the flags. If the flags get more +  // complicated over time, we might need more logic here. +  return getFlags() | Other.getFlags(); +} +  bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {    assert(!isBundledWithPred() && "Must be called on bundle header");    for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { @@ -437,8 +467,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,          return false;      }    } -  // If DebugLoc does not match then two dbg.values are not identical. -  if (isDebugValue()) +  // If DebugLoc does not match then two debug instructions are not identical. +  if (isDebugInstr())      if (getDebugLoc() && Other.getDebugLoc() &&          getDebugLoc() != Other.getDebugLoc())        return false; @@ -489,21 +519,39 @@ void MachineInstr::eraseFromBundle() {    getParent()->erase_instr(this);  } -/// getNumExplicitOperands - Returns the number of non-implicit operands. -///  unsigned MachineInstr::getNumExplicitOperands() const {    unsigned NumOperands = MCID->getNumOperands();    if (!MCID->isVariadic())      return NumOperands; -  for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { -    const MachineOperand &MO = getOperand(i); -    if (!MO.isReg() || !MO.isImplicit()) -      NumOperands++; +  for (unsigned I = NumOperands, E = getNumOperands(); I != E; ++I) { +    const MachineOperand &MO = getOperand(I); +    // The operands must always be in the following order: +    // - explicit reg defs, +    // - other explicit operands (reg uses, immediates, etc.), +    // - implicit reg defs +    // - implicit reg uses +    if (MO.isReg() && MO.isImplicit()) +      break; +    ++NumOperands;    }    return NumOperands;  } +unsigned MachineInstr::getNumExplicitDefs() const { +  unsigned NumDefs = MCID->getNumDefs(); +  if (!MCID->isVariadic()) +    return NumDefs; + +  for (unsigned I = NumDefs, E = getNumOperands(); I != E; ++I) { +    const MachineOperand &MO = getOperand(I); +    if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) +      break; +    ++NumDefs; +  } +  return NumDefs; +} +  void MachineInstr::bundleWithPred() {    assert(!isBundledWithPred() && "MI is already bundled with its predecessor");    setFlag(BundledPred); @@ -583,6 +631,11 @@ int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,    return -1;  } +const DILabel *MachineInstr::getDebugLabel() const { +  assert(isDebugLabel() && "not a DBG_LABEL"); +  return cast<DILabel>(getOperand(0).getMetadata()); +} +  const DILocalVariable *MachineInstr::getDebugVariable() const {    assert(isDebugValue() && "not a DBG_VALUE");    return cast<DILocalVariable>(getOperand(2).getMetadata()); @@ -905,8 +958,7 @@ void MachineInstr::clearKillInfo() {    }  } -void MachineInstr::substituteRegister(unsigned FromReg, -                                      unsigned ToReg, +void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg,                                        unsigned SubIdx,                                        const TargetRegisterInfo &RegInfo) {    if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { @@ -941,7 +993,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {      return false;    } -  if (isPosition() || isDebugValue() || isTerminator() || +  if (isPosition() || isDebugInstr() || isTerminator() ||        hasUnmodeledSideEffects())      return false; @@ -1195,8 +1247,12 @@ LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,    if (PrintedTypes[OpInfo.getGenericTypeIndex()])      return LLT{}; -  PrintedTypes.set(OpInfo.getGenericTypeIndex()); -  return MRI.getType(Op.getReg()); +  LLT TypeToPrint = MRI.getType(Op.getReg()); +  // Don't mark the type index printed if it wasn't actually printed: maybe +  // another operand with the same type index has an actual type attached: +  if (TypeToPrint.isValid()) +    PrintedTypes.set(OpInfo.getGenericTypeIndex()); +  return TypeToPrint;  }  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1206,39 +1262,36 @@ LLVM_DUMP_METHOD void MachineInstr::dump() const {  }  #endif -void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc, +void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers, +                         bool SkipDebugLoc, bool AddNewLine,                           const TargetInstrInfo *TII) const {    const Module *M = nullptr; -  if (const MachineBasicBlock *MBB = getParent()) -    if (const MachineFunction *MF = MBB->getParent()) -      M = MF->getFunction().getParent(); +  const Function *F = nullptr; +  if (const MachineFunction *MF = getMFIfAvailable(*this)) { +    F = &MF->getFunction(); +    M = F->getParent(); +    if (!TII) +      TII = MF->getSubtarget().getInstrInfo(); +  }    ModuleSlotTracker MST(M); -  print(OS, MST, SkipOpers, SkipDebugLoc, TII); +  if (F) +    MST.incorporateFunction(*F); +  print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII);  }  void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, -                         bool SkipOpers, bool SkipDebugLoc, -                         const TargetInstrInfo *TII) const { +                         bool IsStandalone, bool SkipOpers, bool SkipDebugLoc, +                         bool AddNewLine, const TargetInstrInfo *TII) const {    // We can be a bit tidier if we know the MachineFunction.    const MachineFunction *MF = nullptr;    const TargetRegisterInfo *TRI = nullptr;    const MachineRegisterInfo *MRI = nullptr;    const TargetIntrinsicInfo *IntrinsicInfo = nullptr; +  tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII); -  if (const MachineBasicBlock *MBB = getParent()) { -    MF = MBB->getParent(); -    if (MF) { -      MRI = &MF->getRegInfo(); -      TRI = MF->getSubtarget().getRegisterInfo(); -      if (!TII) -        TII = MF->getSubtarget().getInstrInfo(); -      IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); -    } -  } - -  // Save a list of virtual registers. -  SmallVector<unsigned, 8> VirtRegs; +  if (isCFIInstruction()) +    assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction");    SmallBitVector PrintedTypes(8);    bool ShouldPrintRegisterTies = hasComplexRegisterTies(); @@ -1250,26 +1303,47 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,        return findTiedOperandIdx(OpIdx);      return 0U;    }; +  unsigned StartOp = 0; +  unsigned e = getNumOperands(); +    // Print explicitly defined operands on the left of an assignment syntax. -  unsigned StartOp = 0, e = getNumOperands(); -  for (; StartOp < e && getOperand(StartOp).isReg() && -         getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit(); -       ++StartOp) { +  while (StartOp < e) { +    const MachineOperand &MO = getOperand(StartOp); +    if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) +      break; +      if (StartOp != 0)        OS << ", "; +      LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};      unsigned TiedOperandIdx = getTiedOperandIdx(StartOp); -    getOperand(StartOp).print(OS, MST, TypeToPrint, /*PrintDef=*/false, -                              ShouldPrintRegisterTies, TiedOperandIdx, TRI, -                              IntrinsicInfo); -    unsigned Reg = getOperand(StartOp).getReg(); -    if (TargetRegisterInfo::isVirtualRegister(Reg)) -      VirtRegs.push_back(Reg); +    MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone, +             ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); +    ++StartOp;    }    if (StartOp != 0)      OS << " = "; +  if (getFlag(MachineInstr::FrameSetup)) +    OS << "frame-setup "; +  if (getFlag(MachineInstr::FrameDestroy)) +    OS << "frame-destroy "; +  if (getFlag(MachineInstr::FmNoNans)) +    OS << "nnan "; +  if (getFlag(MachineInstr::FmNoInfs)) +    OS << "ninf "; +  if (getFlag(MachineInstr::FmNsz)) +    OS << "nsz "; +  if (getFlag(MachineInstr::FmArcp)) +    OS << "arcp "; +  if (getFlag(MachineInstr::FmContract)) +    OS << "contract "; +  if (getFlag(MachineInstr::FmAfn)) +    OS << "afn "; +  if (getFlag(MachineInstr::FmReassoc)) +    OS << "reassoc "; +    // Print the opcode name.    if (TII)      OS << TII->getName(getOpcode()); @@ -1290,7 +1364,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,      const unsigned OpIdx = InlineAsm::MIOp_AsmString;      LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};      unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx); -    getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, +    getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,                              ShouldPrintRegisterTies, TiedOperandIdx, TRI,                              IntrinsicInfo); @@ -1318,18 +1392,9 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,    for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {      const MachineOperand &MO = getOperand(i); -    if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) -      VirtRegs.push_back(MO.getReg()); -      if (FirstOp) FirstOp = false; else OS << ",";      OS << " "; -    if (i < getDesc().NumOperands) { -      const MCOperandInfo &MCOI = getDesc().OpInfo[i]; -      if (MCOI.isPredicate()) -        OS << "pred:"; -      if (MCOI.isOptionalDef()) -        OS << "opt:"; -    } +      if (isDebugValue() && MO.isMetadata()) {        // Pretty print DBG_VALUE instructions.        auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata()); @@ -1338,12 +1403,20 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,        else {          LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};          unsigned TiedOperandIdx = getTiedOperandIdx(i); -        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, +        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, +                 ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); +      } +    } else if (isDebugLabel() && MO.isMetadata()) { +      // Pretty print DBG_LABEL instructions. +      auto *DIL = dyn_cast<DILabel>(MO.getMetadata()); +      if (DIL && !DIL->getName().empty()) +        OS << "\"" << DIL->getName() << '\"'; +      else { +        LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; +        unsigned TiedOperandIdx = getTiedOperandIdx(i); +        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,                   ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);        } -    } else if (TRI && (isInsertSubreg() || isRegSequence() || -                       (isSubregToReg() && i == 3)) && MO.isImm()) { -      OS << TRI->getSubRegIndexName(MO.getImm());      } else if (i == AsmDescOp && MO.isImm()) {        // Pretty print the inline asm operand descriptor.        OS << '$' << AsmOpCount++; @@ -1406,77 +1479,66 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,        LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};        unsigned TiedOperandIdx = getTiedOperandIdx(i);        if (MO.isImm() && isOperandSubregIdx(i)) -        MachineOperand::printSubregIdx(OS, MO.getImm(), TRI); +        MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI);        else -        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, +        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,                   ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);      }    } -  bool HaveSemi = false; -  const unsigned PrintableFlags = FrameSetup | FrameDestroy; -  if (Flags & PrintableFlags) { -    if (!HaveSemi) { -      OS << ";"; -      HaveSemi = true; +  if (!SkipDebugLoc) { +    if (const DebugLoc &DL = getDebugLoc()) { +      if (!FirstOp) +        OS << ','; +      OS << " debug-location "; +      DL->printAsOperand(OS, MST);      } -    OS << " flags: "; - -    if (Flags & FrameSetup) -      OS << "FrameSetup"; - -    if (Flags & FrameDestroy) -      OS << "FrameDestroy";    }    if (!memoperands_empty()) { -    if (!HaveSemi) { -      OS << ";"; -      HaveSemi = true; +    SmallVector<StringRef, 0> SSNs; +    const LLVMContext *Context = nullptr; +    std::unique_ptr<LLVMContext> CtxPtr; +    const MachineFrameInfo *MFI = nullptr; +    if (const MachineFunction *MF = getMFIfAvailable(*this)) { +      MFI = &MF->getFrameInfo(); +      Context = &MF->getFunction().getContext(); +    } else { +      CtxPtr = llvm::make_unique<LLVMContext>(); +      Context = CtxPtr.get();      } -    OS << " mem:"; -    for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); -         i != e; ++i) { -      (*i)->print(OS, MST); -      if (std::next(i) != e) -        OS << " "; +    OS << " :: "; +    bool NeedComma = false; +    for (const MachineMemOperand *Op : memoperands()) { +      if (NeedComma) +        OS << ", "; +      Op->print(OS, MST, SSNs, *Context, MFI, TII); +      NeedComma = true;      }    } -  // Print the regclass of any virtual registers encountered. -  if (MRI && !VirtRegs.empty()) { +  if (SkipDebugLoc) +    return; + +  bool HaveSemi = false; + +  // Print debug location information. +  if (const DebugLoc &DL = getDebugLoc()) {      if (!HaveSemi) { -      OS << ";"; +      OS << ';';        HaveSemi = true;      } -    for (unsigned i = 0; i != VirtRegs.size(); ++i) { -      const RegClassOrRegBank &RC = MRI->getRegClassOrRegBank(VirtRegs[i]); -      if (!RC) -        continue; -      // Generic virtual registers do not have register classes. -      if (RC.is<const RegisterBank *>()) -        OS << " " << RC.get<const RegisterBank *>()->getName(); -      else -        OS << " " -           << TRI->getRegClassName(RC.get<const TargetRegisterClass *>()); -      OS << ':' << printReg(VirtRegs[i]); -      for (unsigned j = i+1; j != VirtRegs.size();) { -        if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) { -          ++j; -          continue; -        } -        if (VirtRegs[i] != VirtRegs[j]) -          OS << "," << printReg(VirtRegs[j]); -        VirtRegs.erase(VirtRegs.begin()+j); -      } -    } +    OS << ' '; +    DL.print(OS);    } -  // Print debug location information. +  // Print extra comments for DEBUG_VALUE.    if (isDebugValue() && getOperand(e - 2).isMetadata()) { -    if (!HaveSemi) +    if (!HaveSemi) {        OS << ";"; +      HaveSemi = true; +    }      auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());      OS << " line no:" <<  DV->getLine();      if (auto *InlinedAt = debugLoc->getInlinedAt()) { @@ -1489,16 +1551,11 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,      }      if (isIndirectDebugValue())        OS << " indirect"; -  } else if (SkipDebugLoc) { -    return; -  } else if (debugLoc && MF) { -    if (!HaveSemi) -      OS << ";"; -    OS << " dbg:"; -    debugLoc.print(OS);    } +  // TODO: DBG_LABEL -  OS << '\n'; +  if (AddNewLine) +    OS << '\n';  }  bool MachineInstr::addRegisterKilled(unsigned IncomingReg, @@ -1737,33 +1794,55 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,    assert(cast<DIExpression>(Expr)->isValid() && "not an expression");    assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&           "Expected inlined-at fields to agree"); +  auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug);    if (IsIndirect) -    return BuildMI(MF, DL, MCID) -        .addReg(Reg, RegState::Debug) -        .addImm(0U) -        .addMetadata(Variable) -        .addMetadata(Expr); +    MIB.addImm(0U);    else -    return BuildMI(MF, DL, MCID) -        .addReg(Reg, RegState::Debug) -        .addReg(0U, RegState::Debug) -        .addMetadata(Variable) -        .addMetadata(Expr); +    MIB.addReg(0U, RegState::Debug); +  return MIB.addMetadata(Variable).addMetadata(Expr);  } +MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, +                                  const MCInstrDesc &MCID, bool IsIndirect, +                                  MachineOperand &MO, const MDNode *Variable, +                                  const MDNode *Expr) { +  assert(isa<DILocalVariable>(Variable) && "not a variable"); +  assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); +  assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  if (MO.isReg()) +    return BuildMI(MF, DL, MCID, IsIndirect, MO.getReg(), Variable, Expr); + +  auto MIB = BuildMI(MF, DL, MCID).add(MO); +  if (IsIndirect) +    MIB.addImm(0U); +  else +    MIB.addReg(0U, RegState::Debug); +  return MIB.addMetadata(Variable).addMetadata(Expr); + } +  MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,                                    MachineBasicBlock::iterator I,                                    const DebugLoc &DL, const MCInstrDesc &MCID,                                    bool IsIndirect, unsigned Reg,                                    const MDNode *Variable, const MDNode *Expr) { -  assert(isa<DILocalVariable>(Variable) && "not a variable"); -  assert(cast<DIExpression>(Expr)->isValid() && "not an expression");    MachineFunction &MF = *BB.getParent();    MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr);    BB.insert(I, MI);    return MachineInstrBuilder(MF, MI);  } +MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, +                                  MachineBasicBlock::iterator I, +                                  const DebugLoc &DL, const MCInstrDesc &MCID, +                                  bool IsIndirect, MachineOperand &MO, +                                  const MDNode *Variable, const MDNode *Expr) { +  MachineFunction &MF = *BB.getParent(); +  MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, MO, Variable, Expr); +  BB.insert(I, MI); +  return MachineInstrBuilder(MF, *MI); +} +  /// Compute the new DIExpression to use with a DBG_VALUE for a spill slot.  /// This prepends DW_OP_deref when spilling an indirect DBG_VALUE.  static const DIExpression *computeExprForSpill(const MachineInstr &MI) { diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 75d449c7ac6f..7332b7162030 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -71,6 +71,10 @@ SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",                         cl::desc("MachineLICM should sink instructions into "                                  "loops to avoid register spills"),                         cl::init(false), cl::Hidden); +static cl::opt<bool> +HoistConstStores("hoist-const-stores", +                 cl::desc("Hoist invariant stores"), +                 cl::init(true), cl::Hidden);  STATISTIC(NumHoisted,            "Number of machine instructions hoisted out of loops"); @@ -82,17 +86,19 @@ STATISTIC(NumCSEed,            "Number of hoisted machine instructions CSEed");  STATISTIC(NumPostRAHoisted,            "Number of machine instructions hoisted out of loops post regalloc"); +STATISTIC(NumStoreConst, +          "Number of stores of const phys reg hoisted out of loops");  namespace { -  class MachineLICM : public MachineFunctionPass { +  class MachineLICMBase : public MachineFunctionPass {      const TargetInstrInfo *TII;      const TargetLoweringBase *TLI;      const TargetRegisterInfo *TRI;      const MachineFrameInfo *MFI;      MachineRegisterInfo *MRI;      TargetSchedModel SchedModel; -    bool PreRegAlloc = true; +    bool PreRegAlloc;      // Various analyses that we use...      AliasAnalysis        *AA;      // Alias analysis info. @@ -138,16 +144,8 @@ namespace {      unsigned SpeculationState;    public: -    static char ID; // Pass identification, replacement for typeid - -    MachineLICM() : MachineFunctionPass(ID) { -      initializeMachineLICMPass(*PassRegistry::getPassRegistry()); -    } - -    explicit MachineLICM(bool PreRA) -        : MachineFunctionPass(ID), PreRegAlloc(PreRA) { -        initializeMachineLICMPass(*PassRegistry::getPassRegistry()); -    } +    MachineLICMBase(char &PassID, bool PreRegAlloc) +        : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}      bool runOnMachineFunction(MachineFunction &MF) override; @@ -218,7 +216,7 @@ namespace {          DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,          DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); -    void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); +    void HoistOutOfLoop(MachineDomTreeNode *HeaderN);      void HoistRegion(MachineDomTreeNode *N, bool IsHeader); @@ -252,11 +250,29 @@ namespace {      MachineBasicBlock *getCurPreheader();    }; +  class MachineLICM : public MachineLICMBase { +  public: +    static char ID; +    MachineLICM() : MachineLICMBase(ID, false) { +      initializeMachineLICMPass(*PassRegistry::getPassRegistry()); +    } +  }; + +  class EarlyMachineLICM : public MachineLICMBase { +  public: +    static char ID; +    EarlyMachineLICM() : MachineLICMBase(ID, true) { +      initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry()); +    } +  }; +  } // end anonymous namespace -char MachineLICM::ID = 0; +char MachineLICM::ID; +char EarlyMachineLICM::ID;  char &llvm::MachineLICMID = MachineLICM::ID; +char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;  INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,                        "Machine Loop Invariant Code Motion", false, false) @@ -266,6 +282,14 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)  INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,                      "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm", +                      "Early Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", +                    "Early Machine Loop Invariant Code Motion", false, false) +  /// Test if the given loop is the outer-most loop that has a unique predecessor.  static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {    // Check whether this loop even has a unique predecessor. @@ -279,7 +303,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {    return true;  } -bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { +bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {    if (skipFunction(MF.getFunction()))      return false; @@ -290,15 +314,15 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {    TRI = ST.getRegisterInfo();    MFI = &MF.getFrameInfo();    MRI = &MF.getRegInfo(); -  SchedModel.init(ST.getSchedModel(), &ST, TII); +  SchedModel.init(&ST);    PreRegAlloc = MRI->isSSA();    if (PreRegAlloc) -    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); +    LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");    else -    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); -  DEBUG(dbgs() << MF.getName() << " ********\n"); +    LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); +  LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");    if (PreRegAlloc) {      // Estimate register pressure during pre-regalloc pass. @@ -350,6 +374,10 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {  /// Return true if instruction stores to the specified frame.  static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { +  // Check mayStore before memory operands so that e.g. DBG_VALUEs will return +  // true since they have no memory operands. +  if (!MI->mayStore()) +     return false;    // If we lost memory operands, conservatively assume that the instruction    // writes to all slots.    if (MI->memoperands_empty()) @@ -368,11 +396,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {  /// Examine the instruction for potentai LICM candidate. Also  /// gather register def and frame object update information. -void MachineLICM::ProcessMI(MachineInstr *MI, -                            BitVector &PhysRegDefs, -                            BitVector &PhysRegClobbers, -                            SmallSet<int, 32> &StoredFIs, -                            SmallVectorImpl<CandidateInfo> &Candidates) { +void MachineLICMBase::ProcessMI(MachineInstr *MI, +                                BitVector &PhysRegDefs, +                                BitVector &PhysRegClobbers, +                                SmallSet<int, 32> &StoredFIs, +                                SmallVectorImpl<CandidateInfo> &Candidates) {    bool RuledOut = false;    bool HasNonInvariantUse = false;    unsigned Def = 0; @@ -455,7 +483,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,  /// Walk the specified region of the CFG and hoist loop invariants out to the  /// preheader. -void MachineLICM::HoistRegionPostRA() { +void MachineLICMBase::HoistRegionPostRA() {    MachineBasicBlock *Preheader = getCurPreheader();    if (!Preheader)      return; @@ -541,7 +569,7 @@ void MachineLICM::HoistRegionPostRA() {  /// Add register 'Reg' to the livein sets of BBs in the current loop, and make  /// sure it is not killed by any instructions in the loop. -void MachineLICM::AddToLiveIns(unsigned Reg) { +void MachineLICMBase::AddToLiveIns(unsigned Reg) {    const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();    for (MachineBasicBlock *BB : Blocks) {      if (!BB->isLiveIn(Reg)) @@ -558,13 +586,14 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {  /// When an instruction is found to only use loop invariant operands that is  /// safe to hoist, this instruction is called to do the dirty work. -void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { +void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {    MachineBasicBlock *Preheader = getCurPreheader();    // Now move the instructions to the predecessor, inserting it before any    // terminator instructions. -  DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from " -               << printMBBReference(*MI->getParent()) << ": " << *MI); +  LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) +                    << " from " << printMBBReference(*MI->getParent()) << ": " +                    << *MI);    // Splice the instruction to the preheader.    MachineBasicBlock *MBB = MI->getParent(); @@ -581,7 +610,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {  /// Check if this mbb is guaranteed to execute. If not then a load from this mbb  /// may not be safe to hoist. -bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { +bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {    if (SpeculationState != SpeculateUnknown)      return SpeculationState == SpeculateFalse; @@ -600,24 +629,24 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {    return true;  } -void MachineLICM::EnterScope(MachineBasicBlock *MBB) { -  DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); +void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { +  LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');    // Remember livein register pressure.    BackTrace.push_back(RegPressure);  } -void MachineLICM::ExitScope(MachineBasicBlock *MBB) { -  DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n'); +void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) { +  LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');    BackTrace.pop_back();  }  /// Destroy scope for the MBB that corresponds to the given dominator tree node  /// if its a leaf or all of its children are done. Walk up the dominator tree to  /// destroy ancestors which are now done. -void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, -                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, -                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { +void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, +    DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, +    DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {    if (OpenChildren[Node])      return; @@ -638,7 +667,7 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,  /// specified header block, and that are in the current loop) in depth first  /// order w.r.t the DominatorTree. This allows us to visit definitions before  /// uses, allowing us to hoist a loop body in one pass without iteration. -void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { +void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {    MachineBasicBlock *Preheader = getCurPreheader();    if (!Preheader)      return; @@ -708,6 +737,8 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {        MachineInstr *MI = &*MII;        if (!Hoist(MI, Preheader))          UpdateRegPressure(MI); +      // If we have hoisted an instruction that may store, it can only be a +      // constant store.        MII = NextMII;      } @@ -719,7 +750,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {  /// Sink instructions into loops if profitable. This especially tries to prevent  /// register spills caused by register pressure if there is little to no  /// overhead moving instructions into loops. -void MachineLICM::SinkIntoLoop() { +void MachineLICMBase::SinkIntoLoop() {    MachineBasicBlock *Preheader = getCurPreheader();    if (!Preheader)      return; @@ -773,7 +804,7 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {  /// Find all virtual register references that are liveout of the preheader to  /// initialize the starting "register pressure". Note this does not count live  /// through (livein but not used) registers. -void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { +void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {    std::fill(RegPressure.begin(), RegPressure.end(), 0);    // If the preheader has only a single predecessor and it ends with a @@ -792,8 +823,8 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {  }  /// Update estimate of register pressure after the specified instruction. -void MachineLICM::UpdateRegPressure(const MachineInstr *MI, -                                    bool ConsiderUnseenAsDef) { +void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI, +                                        bool ConsiderUnseenAsDef) {    auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);    for (const auto &RPIdAndCost : Cost) {      unsigned Class = RPIdAndCost.first; @@ -811,8 +842,8 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI,  /// figure out which usages are live-ins.  /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.  DenseMap<unsigned, int> -MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, -                              bool ConsiderUnseenAsDef) { +MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, +                                  bool ConsiderUnseenAsDef) {    DenseMap<unsigned, int> Cost;    if (MI->isImplicitDef())      return Cost; @@ -871,13 +902,86 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {    return false;  } +// This function iterates through all the operands of the input store MI and +// checks that each register operand statisfies isCallerPreservedPhysReg. +// This means, the value being stored and the address where it is being stored +// is constant throughout the body of the function (not including prologue and +// epilogue). When called with an MI that isn't a store, it returns false. +// A future improvement can be to check if the store registers are constant +// throughout the loop rather than throughout the funtion. +static bool isInvariantStore(const MachineInstr &MI, +                             const TargetRegisterInfo *TRI, +                             const MachineRegisterInfo *MRI) { + +  bool FoundCallerPresReg = false; +  if (!MI.mayStore() || MI.hasUnmodeledSideEffects() || +      (MI.getNumOperands() == 0)) +    return false; + +  // Check that all register operands are caller-preserved physical registers. +  for (const MachineOperand &MO : MI.operands()) { +    if (MO.isReg()) { +      unsigned Reg = MO.getReg(); +      // If operand is a virtual register, check if it comes from a copy of a +      // physical register. +      if (TargetRegisterInfo::isVirtualRegister(Reg)) +        Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); +      if (TargetRegisterInfo::isVirtualRegister(Reg)) +        return false; +      if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF())) +        return false; +      else +        FoundCallerPresReg = true; +    } else if (!MO.isImm()) { +        return false; +    } +  } +  return FoundCallerPresReg; +} + +// Return true if the input MI is a copy instruction that feeds an invariant +// store instruction. This means that the src of the copy has to satisfy +// isCallerPreservedPhysReg and atleast one of it's users should satisfy +// isInvariantStore. +static bool isCopyFeedingInvariantStore(const MachineInstr &MI, +                                        const MachineRegisterInfo *MRI, +                                        const TargetRegisterInfo *TRI) { + +  // FIXME: If targets would like to look through instructions that aren't +  // pure copies, this can be updated to a query. +  if (!MI.isCopy()) +    return false; + +  const MachineFunction *MF = MI.getMF(); +  // Check that we are copying a constant physical register. +  unsigned CopySrcReg = MI.getOperand(1).getReg(); +  if (TargetRegisterInfo::isVirtualRegister(CopySrcReg)) +    return false; + +  if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF)) +    return false; + +  unsigned CopyDstReg = MI.getOperand(0).getReg(); +  // Check if any of the uses of the copy are invariant stores. +  assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) && +          "copy dst is not a virtual reg"); + +  for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { +    if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI)) +      return true; +  } +  return false; +} +  /// Returns true if the instruction may be a suitable candidate for LICM.  /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. -bool MachineLICM::IsLICMCandidate(MachineInstr &I) { +bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {    // Check if it's safe to move the instruction.    bool DontMoveAcrossStore = true; -  if (!I.isSafeToMove(AA, DontMoveAcrossStore)) +  if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && +      !(HoistConstStores && isInvariantStore(I, TRI, MRI))) {      return false; +  }    // If it is load then check if it is guaranteed to execute by making sure that    // it dominates all exiting blocks. If it doesn't, then there is a path out of @@ -896,7 +1000,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {  /// I.e., all virtual register operands are defined outside of the loop,  /// physical registers aren't accessed explicitly, and there are no side  /// effects that aren't captured by the operands or other flags. -bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { +bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {    if (!IsLICMCandidate(I))      return false; @@ -949,7 +1053,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {  /// Return true if the specified instruction is used by a phi node and hoisting  /// it could cause a copy to be inserted. -bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { +bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {    SmallVector<const MachineInstr*, 8> Work(1, MI);    do {      MI = Work.pop_back_val(); @@ -984,8 +1088,9 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {  /// Compute operand latency between a def of 'Reg' and an use in the current  /// loop, return true if the target considered it high. -bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, -                                        unsigned DefIdx, unsigned Reg) const { +bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, +                                            unsigned DefIdx, +                                            unsigned Reg) const {    if (MRI->use_nodbg_empty(Reg))      return false; @@ -1015,7 +1120,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,  /// Return true if the instruction is marked "cheap" or the operand latency  /// between its def and a use is one or less. -bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { +bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {    if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())      return true; @@ -1040,8 +1145,9 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {  /// Visit BBs from header to current BB, check if hoisting an instruction of the  /// given cost matrix can cause high register pressure. -bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, -                                          bool CheapInstr) { +bool +MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, +                                         bool CheapInstr) {    for (const auto &RPIdAndCost : Cost) {      if (RPIdAndCost.second <= 0)        continue; @@ -1065,7 +1171,7 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,  /// Traverse the back trace from header to the current block and update their  /// register pressures to reflect the effect of hoisting MI from the current  /// block to the preheader. -void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { +void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {    // First compute the 'cost' of the instruction, i.e. its contribution    // to register pressure.    auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, @@ -1079,7 +1185,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {  /// Return true if it is potentially profitable to hoist the given loop  /// invariant. -bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { +bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {    if (MI.isImplicitDef())      return true; @@ -1095,12 +1201,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {    // - When hoisting the last use of a value in the loop, that value no longer    //   needs to be live in the loop. This lowers register pressure in the loop. +  if (HoistConstStores &&  isCopyFeedingInvariantStore(MI, MRI, TRI)) +    return true; +    bool CheapInstr = IsCheapInstruction(MI);    bool CreatesCopy = HasLoopPHIUse(&MI);    // Don't hoist a cheap instruction if it would create a copy in the loop.    if (CheapInstr && CreatesCopy) { -    DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); +    LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);      return false;    } @@ -1119,7 +1228,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {      if (!TargetRegisterInfo::isVirtualRegister(Reg))        continue;      if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { -      DEBUG(dbgs() << "Hoist High Latency: " << MI); +      LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);        ++NumHighLatency;        return true;      } @@ -1137,14 +1246,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {    // Visit BBs from header to current BB, if hoisting this doesn't cause    // high register pressure, then it's safe to proceed.    if (!CanCauseHighRegPressure(Cost, CheapInstr)) { -    DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); +    LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);      ++NumLowRP;      return true;    }    // Don't risk increasing register pressure if it would create copies.    if (CreatesCopy) { -    DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); +    LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);      return false;    } @@ -1153,7 +1262,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {    // conservative.    if (AvoidSpeculation &&        (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { -    DEBUG(dbgs() << "Won't speculate: " << MI); +    LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);      return false;    } @@ -1161,7 +1270,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {    // to be remat'ed.    if (!TII->isTriviallyReMaterializable(MI, AA) &&        !MI.isDereferenceableInvariantLoad(AA)) { -    DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); +    LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);      return false;    } @@ -1171,7 +1280,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {  /// Unfold a load from the given machineinstr if the load itself could be  /// hoisted. Return the unfolded and hoistable load, or null if the load  /// couldn't be unfolded or if it wouldn't be hoistable. -MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { +MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {    // Don't unfold simple loads.    if (MI->canFoldAsLoad())      return nullptr; @@ -1229,7 +1338,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {  /// Initialize the CSE map with instructions that are in the current loop  /// preheader that may become duplicates of instructions that are hoisted  /// out of the loop. -void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { +void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {    for (MachineInstr &MI : *BB)      CSEMap[MI.getOpcode()].push_back(&MI);  } @@ -1237,8 +1346,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {  /// Find an instruction amount PrevMIs that is a duplicate of MI.  /// Return this instruction if it's found.  const MachineInstr* -MachineLICM::LookForDuplicate(const MachineInstr *MI, -                              std::vector<const MachineInstr*> &PrevMIs) { +MachineLICMBase::LookForDuplicate(const MachineInstr *MI, +                                  std::vector<const MachineInstr*> &PrevMIs) {    for (const MachineInstr *PrevMI : PrevMIs)      if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))        return PrevMI; @@ -1250,15 +1359,15 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,  /// computes the same value. If it's found, do a RAU on with the definition of  /// the existing instruction rather than hoisting the instruction to the  /// preheader. -bool MachineLICM::EliminateCSE(MachineInstr *MI, -          DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) { +bool MachineLICMBase::EliminateCSE(MachineInstr *MI, +    DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {    // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate    // the undef property onto uses.    if (CI == CSEMap.end() || MI->isImplicitDef())      return false;    if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { -    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); +    LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);      // Replace virtual registers defined by MI by their counterparts defined      // by Dup. @@ -1308,7 +1417,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,  /// Return true if the given instruction will be CSE'd if it's hoisted out of  /// the loop. -bool MachineLICM::MayCSE(MachineInstr *MI) { +bool MachineLICMBase::MayCSE(MachineInstr *MI) {    unsigned Opcode = MI->getOpcode();    DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator      CI = CSEMap.find(Opcode); @@ -1323,7 +1432,7 @@ bool MachineLICM::MayCSE(MachineInstr *MI) {  /// When an instruction is found to use only loop invariant operands  /// that are safe to hoist, this instruction is called to do the dirty work.  /// It returns true if the instruction is hoisted. -bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { +bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {    // First check whether we should hoist this instruction.    if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {      // If not, try unfolding a hoistable load. @@ -1331,16 +1440,21 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {      if (!MI) return false;    } +  // If we have hoisted an instruction that may store, it can only be a constant +  // store. +  if (MI->mayStore()) +    NumStoreConst++; +    // Now move the instructions to the predecessor, inserting it before any    // terminator instructions. -  DEBUG({ -      dbgs() << "Hoisting " << *MI; -      if (MI->getParent()->getBasicBlock()) -        dbgs() << " from " << printMBBReference(*MI->getParent()); -      if (Preheader->getBasicBlock()) -        dbgs() << " to " << printMBBReference(*Preheader); -      dbgs() << "\n"; -    }); +  LLVM_DEBUG({ +    dbgs() << "Hoisting " << *MI; +    if (MI->getParent()->getBasicBlock()) +      dbgs() << " from " << printMBBReference(*MI->getParent()); +    if (Preheader->getBasicBlock()) +      dbgs() << " to " << printMBBReference(*Preheader); +    dbgs() << "\n"; +  });    // If this is the first instruction being hoisted to the preheader,    // initialize the CSE map with potential common expressions. @@ -1386,7 +1500,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {  }  /// Get the preheader for the current loop, splitting a critical edge if needed. -MachineBasicBlock *MachineLICM::getCurPreheader() { +MachineBasicBlock *MachineLICMBase::getCurPreheader() {    // Determine the block to which to hoist instructions. If we can't find a    // suitable loop predecessor, we can't do any hoisting. diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index a9aa1d954e70..2bce59235057 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -18,6 +18,7 @@  #include "llvm/Analysis/LoopInfoImpl.h"  #include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/Passes.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h"  using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 8f0b89657d02..054cc97f8374 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -15,7 +15,6 @@  #include "llvm/ADT/TinyPtrVector.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Instructions.h" @@ -27,6 +26,7 @@  #include "llvm/Pass.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include <algorithm>  #include <cassert> diff --git a/contrib/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm/lib/CodeGen/MachineOperand.cpp index ec81c6391171..8098333832b4 100644 --- a/contrib/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOperand.cpp @@ -12,6 +12,7 @@  //===----------------------------------------------------------------------===//  #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/ADT/StringExtras.h"  #include "llvm/Analysis/Loads.h"  #include "llvm/CodeGen/MIRPrinter.h"  #include "llvm/CodeGen/MachineFrameInfo.h" @@ -19,6 +20,7 @@  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/IRPrintingPasses.h"  #include "llvm/IR/ModuleSlotTracker.h" @@ -50,6 +52,9 @@ void MachineOperand::setReg(unsigned Reg) {    if (getReg() == Reg)      return; // No change. +  // Clear the IsRenamable bit to keep it conservatively correct. +  IsRenamable = false; +    // Otherwise, we have to change the register.  If this operand is embedded    // into a machine function, we need to update the old and new register's    // use/def lists. @@ -110,30 +115,27 @@ bool MachineOperand::isRenamable() const {    assert(isReg() && "Wrong MachineOperand accessor");    assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&           "isRenamable should only be checked on physical registers"); -  return IsRenamable; +  if (!IsRenamable) +    return false; + +  const MachineInstr *MI = getParent(); +  if (!MI) +    return true; + +  if (isDef()) +    return !MI->hasExtraDefRegAllocReq(MachineInstr::IgnoreBundle); + +  assert(isUse() && "Reg is not def or use"); +  return !MI->hasExtraSrcRegAllocReq(MachineInstr::IgnoreBundle);  }  void MachineOperand::setIsRenamable(bool Val) {    assert(isReg() && "Wrong MachineOperand accessor");    assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&           "setIsRenamable should only be called on physical registers"); -  if (const MachineInstr *MI = getParent()) -    if ((isDef() && MI->hasExtraDefRegAllocReq()) || -        (isUse() && MI->hasExtraSrcRegAllocReq())) -      assert(!Val && "isRenamable should be false for " -                     "hasExtraDefRegAllocReq/hasExtraSrcRegAllocReq opcodes");    IsRenamable = Val;  } -void MachineOperand::setIsRenamableIfNoExtraRegAllocReq() { -  if (const MachineInstr *MI = getParent()) -    if ((isDef() && MI->hasExtraDefRegAllocReq()) || -        (isUse() && MI->hasExtraSrcRegAllocReq())) -      return; - -  setIsRenamable(true); -} -  // If this operand is currently a register operand, and if this is in a  // function, deregister the operand from the register's use/def list.  void MachineOperand::removeRegFromUses() { @@ -440,7 +442,70 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,      OS << "<unknown>";  } -void MachineOperand::printSubregIdx(raw_ostream &OS, uint64_t Index, +static void printIRValueReference(raw_ostream &OS, const Value &V, +                                  ModuleSlotTracker &MST) { +  if (isa<GlobalValue>(V)) { +    V.printAsOperand(OS, /*PrintType=*/false, MST); +    return; +  } +  if (isa<Constant>(V)) { +    // Machine memory operands can load/store to/from constant value pointers. +    OS << '`'; +    V.printAsOperand(OS, /*PrintType=*/true, MST); +    OS << '`'; +    return; +  } +  OS << "%ir."; +  if (V.hasName()) { +    printLLVMNameWithoutPrefix(OS, V.getName()); +    return; +  } +  MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V)); +} + +static void printSyncScope(raw_ostream &OS, const LLVMContext &Context, +                           SyncScope::ID SSID, +                           SmallVectorImpl<StringRef> &SSNs) { +  switch (SSID) { +  case SyncScope::System: +    break; +  default: +    if (SSNs.empty()) +      Context.getSyncScopeNames(SSNs); + +    OS << "syncscope(\""; +    printEscapedString(SSNs[SSID], OS); +    OS << "\") "; +    break; +  } +} + +static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, +                                        unsigned TMMOFlag) { +  auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); +  for (const auto &I : Flags) { +    if (I.first == TMMOFlag) { +      return I.second; +    } +  } +  return nullptr; +} + +static void printFrameIndex(raw_ostream& OS, int FrameIndex, bool IsFixed, +                            const MachineFrameInfo *MFI) { +  StringRef Name; +  if (MFI) { +    IsFixed = MFI->isFixedObjectIndex(FrameIndex); +    if (const AllocaInst *Alloca = MFI->getObjectAllocation(FrameIndex)) +      if (Alloca->hasName()) +        Name = Alloca->getName(); +    if (IsFixed) +      FrameIndex -= MFI->getObjectIndexBegin(); +  } +  MachineOperand::printStackObjectReference(OS, FrameIndex, IsFixed, Name); +} + +void MachineOperand::printSubRegIdx(raw_ostream &OS, uint64_t Index,                                      const TargetRegisterInfo *TRI) {    OS << "%subreg.";    if (TRI) @@ -639,15 +704,21 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,  void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,                             const TargetIntrinsicInfo *IntrinsicInfo) const { +  print(OS, LLT{}, TRI, IntrinsicInfo); +} + +void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint, +                           const TargetRegisterInfo *TRI, +                           const TargetIntrinsicInfo *IntrinsicInfo) const {    tryToGetTargetInfo(*this, TRI, IntrinsicInfo);    ModuleSlotTracker DummyMST(nullptr); -  print(OS, DummyMST, LLT{}, /*PrintDef=*/false, +  print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true,          /*ShouldPrintRegisterTies=*/true,          /*TiedOperandIdx=*/0, TRI, IntrinsicInfo);  }  void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, -                           LLT TypeToPrint, bool PrintDef, +                           LLT TypeToPrint, bool PrintDef, bool IsStandalone,                             bool ShouldPrintRegisterTies,                             unsigned TiedOperandIdx,                             const TargetRegisterInfo *TRI, @@ -675,7 +746,15 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,        OS << "debug-use ";      if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())        OS << "renamable "; -    OS << printReg(Reg, TRI); + +    const MachineRegisterInfo *MRI = nullptr; +    if (TargetRegisterInfo::isVirtualRegister(Reg)) { +      if (const MachineFunction *MF = getMFIfAvailable(*this)) { +        MRI = &MF->getRegInfo(); +      } +    } + +    OS << printReg(Reg, TRI, 0, MRI);      // Print the sub register.      if (unsigned SubReg = getSubReg()) {        if (TRI) @@ -687,7 +766,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,      if (TargetRegisterInfo::isVirtualRegister(Reg)) {        if (const MachineFunction *MF = getMFIfAvailable(*this)) {          const MachineRegisterInfo &MRI = MF->getRegInfo(); -        if (!PrintDef || MRI.def_empty(Reg)) { +        if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) {            OS << ':';            OS << printRegClassOrBank(Reg, MRI, TRI);          } @@ -716,17 +795,10 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,    case MachineOperand::MO_FrameIndex: {      int FrameIndex = getIndex();      bool IsFixed = false; -    StringRef Name; -    if (const MachineFunction *MF = getMFIfAvailable(*this)) { -      const MachineFrameInfo &MFI = MF->getFrameInfo(); -      IsFixed = MFI.isFixedObjectIndex(FrameIndex); -      if (const AllocaInst *Alloca = MFI.getObjectAllocation(FrameIndex)) -        if (Alloca->hasName()) -          Name = Alloca->getName(); -      if (IsFixed) -        FrameIndex -= MFI.getObjectIndexBegin(); -    } -    printStackObjectReference(OS, FrameIndex, IsFixed, Name); +    const MachineFrameInfo *MFI = nullptr; +    if (const MachineFunction *MF = getMFIfAvailable(*this)) +      MFI = &MF->getFrameInfo(); +    printFrameIndex(OS, FrameIndex, IsFixed, MFI);      break;    }    case MachineOperand::MO_ConstantPoolIndex: @@ -752,7 +824,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,      break;    case MachineOperand::MO_ExternalSymbol: {      StringRef Name = getSymbolName(); -    OS << '$'; +    OS << '&';      if (Name.empty()) {        OS << "\"\"";      } else { @@ -905,7 +977,7 @@ MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {  }  MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, -                                     uint64_t s, unsigned int a, +                                     uint64_t s, uint64_t a,                                       const AAMDNodes &AAInfo,                                       const MDNode *Ranges, SyncScope::ID SSID,                                       AtomicOrdering Ordering, @@ -961,108 +1033,121 @@ void MachineMemOperand::print(raw_ostream &OS) const {    ModuleSlotTracker DummyMST(nullptr);    print(OS, DummyMST);  } +  void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { -  assert((isLoad() || isStore()) && "SV has to be a load, store or both."); +  SmallVector<StringRef, 0> SSNs; +  LLVMContext Ctx; +  print(OS, MST, SSNs, Ctx, nullptr, nullptr); +} +void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, +                              SmallVectorImpl<StringRef> &SSNs, +                              const LLVMContext &Context, +                              const MachineFrameInfo *MFI, +                              const TargetInstrInfo *TII) const { +  OS << '(';    if (isVolatile()) -    OS << "Volatile "; - +    OS << "volatile "; +  if (isNonTemporal()) +    OS << "non-temporal "; +  if (isDereferenceable()) +    OS << "dereferenceable "; +  if (isInvariant()) +    OS << "invariant "; +  if (getFlags() & MachineMemOperand::MOTargetFlag1) +    OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag1) +       << "\" "; +  if (getFlags() & MachineMemOperand::MOTargetFlag2) +    OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag2) +       << "\" "; +  if (getFlags() & MachineMemOperand::MOTargetFlag3) +    OS << '"' << getTargetMMOFlagName(*TII, MachineMemOperand::MOTargetFlag3) +       << "\" "; + +  assert((isLoad() || isStore()) && +         "machine memory operand must be a load or store (or both)");    if (isLoad()) -    OS << "LD"; +    OS << "load ";    if (isStore()) -    OS << "ST"; -  OS << getSize(); +    OS << "store "; -  // Print the address information. -  OS << "["; -  if (const Value *V = getValue()) -    V->printAsOperand(OS, /*PrintType=*/false, MST); -  else if (const PseudoSourceValue *PSV = getPseudoValue()) -    PSV->printCustom(OS); -  else -    OS << "<unknown>"; +  printSyncScope(OS, Context, getSyncScopeID(), SSNs); -  unsigned AS = getAddrSpace(); -  if (AS != 0) -    OS << "(addrspace=" << AS << ')'; - -  // If the alignment of the memory reference itself differs from the alignment -  // of the base pointer, print the base alignment explicitly, next to the base -  // pointer. -  if (getBaseAlignment() != getAlignment()) -    OS << "(align=" << getBaseAlignment() << ")"; - -  if (getOffset() != 0) -    OS << "+" << getOffset(); -  OS << "]"; - -  // Print the alignment of the reference. -  if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize()) -    OS << "(align=" << getAlignment() << ")"; - -  // Print TBAA info. -  if (const MDNode *TBAAInfo = getAAInfo().TBAA) { -    OS << "(tbaa="; -    if (TBAAInfo->getNumOperands() > 0) -      TBAAInfo->getOperand(0)->printAsOperand(OS, MST); -    else -      OS << "<unknown>"; -    OS << ")"; -  } +  if (getOrdering() != AtomicOrdering::NotAtomic) +    OS << toIRString(getOrdering()) << ' '; +  if (getFailureOrdering() != AtomicOrdering::NotAtomic) +    OS << toIRString(getFailureOrdering()) << ' '; -  // Print AA scope info. -  if (const MDNode *ScopeInfo = getAAInfo().Scope) { -    OS << "(alias.scope="; -    if (ScopeInfo->getNumOperands() > 0) -      for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { -        ScopeInfo->getOperand(i)->printAsOperand(OS, MST); -        if (i != ie - 1) -          OS << ","; -      } -    else -      OS << "<unknown>"; -    OS << ")"; +  OS << getSize(); +  if (const Value *Val = getValue()) { +    OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); +    printIRValueReference(OS, *Val, MST); +  } else if (const PseudoSourceValue *PVal = getPseudoValue()) { +    OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into "); +    assert(PVal && "Expected a pseudo source value"); +    switch (PVal->kind()) { +    case PseudoSourceValue::Stack: +      OS << "stack"; +      break; +    case PseudoSourceValue::GOT: +      OS << "got"; +      break; +    case PseudoSourceValue::JumpTable: +      OS << "jump-table"; +      break; +    case PseudoSourceValue::ConstantPool: +      OS << "constant-pool"; +      break; +    case PseudoSourceValue::FixedStack: { +      int FrameIndex = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex(); +      bool IsFixed = true; +      printFrameIndex(OS, FrameIndex, IsFixed, MFI); +      break; +    } +    case PseudoSourceValue::GlobalValueCallEntry: +      OS << "call-entry "; +      cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand( +          OS, /*PrintType=*/false, MST); +      break; +    case PseudoSourceValue::ExternalSymbolCallEntry: +      OS << "call-entry &"; +      printLLVMNameWithoutPrefix( +          OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); +      break; +    case PseudoSourceValue::TargetCustom: +      // FIXME: This is not necessarily the correct MIR serialization format for +      // a custom pseudo source value, but at least it allows +      // -print-machineinstrs to work on a target with custom pseudo source +      // values. +      OS << "custom "; +      PVal->printCustom(OS); +      break; +    }    } - -  // Print AA noalias scope info. -  if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) { -    OS << "(noalias="; -    if (NoAliasInfo->getNumOperands() > 0) -      for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { -        NoAliasInfo->getOperand(i)->printAsOperand(OS, MST); -        if (i != ie - 1) -          OS << ","; -      } -    else -      OS << "<unknown>"; -    OS << ")"; +  MachineOperand::printOperandOffset(OS, getOffset()); +  if (getBaseAlignment() != getSize()) +    OS << ", align " << getBaseAlignment(); +  auto AAInfo = getAAInfo(); +  if (AAInfo.TBAA) { +    OS << ", !tbaa "; +    AAInfo.TBAA->printAsOperand(OS, MST);    } - -  if (const MDNode *Ranges = getRanges()) { -    unsigned NumRanges = Ranges->getNumOperands(); -    if (NumRanges != 0) { -      OS << "(ranges="; - -      for (unsigned I = 0; I != NumRanges; ++I) { -        Ranges->getOperand(I)->printAsOperand(OS, MST); -        if (I != NumRanges - 1) -          OS << ','; -      } - -      OS << ')'; -    } +  if (AAInfo.Scope) { +    OS << ", !alias.scope "; +    AAInfo.Scope->printAsOperand(OS, MST);    } +  if (AAInfo.NoAlias) { +    OS << ", !noalias "; +    AAInfo.NoAlias->printAsOperand(OS, MST); +  } +  if (getRanges()) { +    OS << ", !range "; +    getRanges()->printAsOperand(OS, MST); +  } +  // FIXME: Implement addrspace printing/parsing in MIR. +  // For now, print this even though parsing it is not available in MIR. +  if (unsigned AS = getAddrSpace()) +    OS << ", addrspace " << AS; -  if (isNonTemporal()) -    OS << "(nontemporal)"; -  if (isDereferenceable()) -    OS << "(dereferenceable)"; -  if (isInvariant()) -    OS << "(invariant)"; -  if (getFlags() & MOTargetFlag1) -    OS << "(flag1)"; -  if (getFlags() & MOTargetFlag2) -    OS << "(flag2)"; -  if (getFlags() & MOTargetFlag3) -    OS << "(flag3)"; +  OS << ')';  } diff --git a/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index ca4452218da1..906d5560d568 100644 --- a/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -27,7 +27,8 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(    Key = MKey;    raw_string_ostream OS(Val); -  MI.print(OS, /*SkipOpers=*/false, /*SkipDebugLoc=*/true); +  MI.print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false, +           /*SkipDebugLoc=*/true);  }  Optional<uint64_t> diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp index e4eb8802ac66..28e4e2c6c87a 100644 --- a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp @@ -25,9 +25,8 @@  ///  /// Targets must implement  ///   * getOutliningCandidateInfo -///   * insertOutlinerEpilogue +///   * buildOutlinedFrame  ///   * insertOutlinedCall -///   * insertOutlinerPrologue  ///   * isFunctionSafeToOutlineFrom  ///  /// in order to make use of the MachineOutliner. @@ -56,18 +55,22 @@  /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf  ///  //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineOutliner.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/Twine.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DIBuilder.h"  #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Mangler.h"  #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h"  #include <functional> @@ -80,121 +83,23 @@  using namespace llvm;  using namespace ore; +using namespace outliner;  STATISTIC(NumOutlined, "Number of candidates outlined");  STATISTIC(FunctionsCreated, "Number of functions created"); -namespace { - -/// \brief An individual sequence of instructions to be replaced with a call to -/// an outlined function. -struct Candidate { -private: -  /// The start index of this \p Candidate in the instruction list. -  unsigned StartIdx; - -  /// The number of instructions in this \p Candidate. -  unsigned Len; - -public: -  /// Set to false if the candidate overlapped with another candidate. -  bool InCandidateList = true; - -  /// \brief The index of this \p Candidate's \p OutlinedFunction in the list of -  /// \p OutlinedFunctions. -  unsigned FunctionIdx; - -  /// Contains all target-specific information for this \p Candidate. -  TargetInstrInfo::MachineOutlinerInfo MInfo; - -  /// Return the number of instructions in this Candidate. -  unsigned getLength() const { return Len; } - -  /// Return the start index of this candidate. -  unsigned getStartIdx() const { return StartIdx; } - -  // Return the end index of this candidate. -  unsigned getEndIdx() const { return StartIdx + Len - 1; } - -  /// \brief The number of instructions that would be saved by outlining every -  /// candidate of this type. -  /// -  /// This is a fixed value which is not updated during the candidate pruning -  /// process. It is only used for deciding which candidate to keep if two -  /// candidates overlap. The true benefit is stored in the OutlinedFunction -  /// for some given candidate. -  unsigned Benefit = 0; - -  Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx) -      : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {} - -  Candidate() {} - -  /// \brief Used to ensure that \p Candidates are outlined in an order that -  /// preserves the start and end indices of other \p Candidates. -  bool operator<(const Candidate &RHS) const { -    return getStartIdx() > RHS.getStartIdx(); -  } -}; - -/// \brief The information necessary to create an outlined function for some -/// class of candidate. -struct OutlinedFunction { - -private: -  /// The number of candidates for this \p OutlinedFunction. -  unsigned OccurrenceCount = 0; - -public: -  std::vector<std::shared_ptr<Candidate>> Candidates; - -  /// The actual outlined function created. -  /// This is initialized after we go through and create the actual function. -  MachineFunction *MF = nullptr; - -  /// A number assigned to this function which appears at the end of its name. -  unsigned Name; +// Set to true if the user wants the outliner to run on linkonceodr linkage +// functions. This is false by default because the linker can dedupe linkonceodr +// functions. Since the outliner is confined to a single module (modulo LTO), +// this is off by default. It should, however, be the default behaviour in +// LTO. +static cl::opt<bool> EnableLinkOnceODROutlining( +    "enable-linkonceodr-outlining", +    cl::Hidden, +    cl::desc("Enable the machine outliner on linkonceodr functions"), +    cl::init(false)); -  /// \brief The sequence of integers corresponding to the instructions in this -  /// function. -  std::vector<unsigned> Sequence; - -  /// Contains all target-specific information for this \p OutlinedFunction. -  TargetInstrInfo::MachineOutlinerInfo MInfo; - -  /// Return the number of candidates for this \p OutlinedFunction. -  unsigned getOccurrenceCount() { return OccurrenceCount; } - -  /// Decrement the occurrence count of this OutlinedFunction and return the -  /// new count. -  unsigned decrement() { -    assert(OccurrenceCount > 0 && "Can't decrement an empty function!"); -    OccurrenceCount--; -    return getOccurrenceCount(); -  } - -  /// \brief Return the number of instructions it would take to outline this -  /// function. -  unsigned getOutliningCost() { -    return (OccurrenceCount * MInfo.CallOverhead) + Sequence.size() + -           MInfo.FrameOverhead; -  } - -  /// \brief Return the number of instructions that would be saved by outlining -  /// this function. -  unsigned getBenefit() { -    unsigned NotOutlinedCost = OccurrenceCount * Sequence.size(); -    unsigned OutlinedCost = getOutliningCost(); -    return (NotOutlinedCost < OutlinedCost) ? 0 -                                            : NotOutlinedCost - OutlinedCost; -  } - -  OutlinedFunction(unsigned Name, unsigned OccurrenceCount, -                   const std::vector<unsigned> &Sequence, -                   TargetInstrInfo::MachineOutlinerInfo &MInfo) -      : OccurrenceCount(OccurrenceCount), Name(Name), Sequence(Sequence), -        MInfo(MInfo) {} -}; +namespace {  /// Represents an undefined index in the suffix tree.  const unsigned EmptyIdx = -1; @@ -242,7 +147,7 @@ struct SuffixTreeNode {    /// For all other nodes, this is ignored.    unsigned SuffixIdx = EmptyIdx; -  /// \brief For internal nodes, a pointer to the internal node representing +  /// For internal nodes, a pointer to the internal node representing    /// the same sequence with the first character chopped off.    ///    /// This acts as a shortcut in Ukkonen's algorithm. One of the things that @@ -356,7 +261,7 @@ private:    /// The end index of each leaf in the tree.    unsigned LeafEndIdx = -1; -  /// \brief Helper struct which keeps track of the next insertion point in +  /// Helper struct which keeps track of the next insertion point in    /// Ukkonen's algorithm.    struct ActiveState {      /// The next node to insert at. @@ -369,7 +274,7 @@ private:      unsigned Len = 0;    }; -  /// \brief The point the next insertion will take place at in the +  /// The point the next insertion will take place at in the    /// construction algorithm.    ActiveState Active; @@ -416,7 +321,7 @@ private:      return N;    } -  /// \brief Set the suffix indices of the leaves to the start indices of their +  /// Set the suffix indices of the leaves to the start indices of their    /// respective suffixes. Also stores each leaf in \p LeafVector at its    /// respective suffix index.    /// @@ -454,7 +359,7 @@ private:      }    } -  /// \brief Construct the suffix tree for the prefix of the input ending at +  /// Construct the suffix tree for the prefix of the input ending at    /// \p EndIdx.    ///    /// Used to construct the full suffix tree iteratively. At the end of each @@ -615,16 +520,16 @@ public:    }  }; -/// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings. +/// Maps \p MachineInstrs to unsigned integers and stores the mappings.  struct InstructionMapper { -  /// \brief The next available integer to assign to a \p MachineInstr that +  /// The next available integer to assign to a \p MachineInstr that    /// cannot be outlined.    ///    /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.    unsigned IllegalInstrNumber = -3; -  /// \brief The next available integer to assign to a \p MachineInstr that can +  /// The next available integer to assign to a \p MachineInstr that can    /// be outlined.    unsigned LegalInstrNumber = 0; @@ -639,11 +544,11 @@ struct InstructionMapper {    /// The vector of unsigned integers that the module is mapped to.    std::vector<unsigned> UnsignedVec; -  /// \brief Stores the location of the instruction associated with the integer +  /// Stores the location of the instruction associated with the integer    /// at index i in \p UnsignedVec for each index i.    std::vector<MachineBasicBlock::iterator> InstrList; -  /// \brief Maps \p *It to a legal integer. +  /// Maps \p *It to a legal integer.    ///    /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,    /// \p IntegerInstructionMap, and \p LegalInstrNumber. @@ -706,7 +611,7 @@ struct InstructionMapper {      return MINumber;    } -  /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds +  /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds    /// and appends it to \p UnsignedVec and \p InstrList.    ///    /// Two instructions are assigned the same integer if they are identical. @@ -720,20 +625,29 @@ struct InstructionMapper {    void convertToUnsignedVec(MachineBasicBlock &MBB,                              const TargetRegisterInfo &TRI,                              const TargetInstrInfo &TII) { +    unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB); +      for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;           It++) {        // Keep track of where this instruction is in the module. -      switch (TII.getOutliningType(*It)) { -      case TargetInstrInfo::MachineOutlinerInstrType::Illegal: +      switch (TII.getOutliningType(It, Flags)) { +      case InstrType::Illegal:          mapToIllegalUnsigned(It);          break; -      case TargetInstrInfo::MachineOutlinerInstrType::Legal: +      case InstrType::Legal:          mapToLegalUnsigned(It);          break; -      case TargetInstrInfo::MachineOutlinerInstrType::Invisible: +      case InstrType::LegalTerminator: +        mapToLegalUnsigned(It); +        InstrList.push_back(It); +        UnsignedVec.push_back(IllegalInstrNumber); +        IllegalInstrNumber--; +        break; + +      case InstrType::Invisible:          break;        }      } @@ -757,7 +671,7 @@ struct InstructionMapper {    }  }; -/// \brief An interprocedural pass which finds repeated sequences of +/// An interprocedural pass which finds repeated sequences of  /// instructions and replaces them with calls to functions.  ///  /// Each instruction is mapped to an unsigned integer and placed in a string. @@ -770,10 +684,19 @@ struct MachineOutliner : public ModulePass {    static char ID; -  /// \brief Set to true if the outliner should consider functions with +  /// Set to true if the outliner should consider functions with    /// linkonceodr linkage.    bool OutlineFromLinkOnceODRs = false; +  /// Set to true if the outliner should run on all functions in the module +  /// considered safe for outlining. +  /// Set to true by default for compatibility with llc's -run-pass option. +  /// Set when the pass is constructed in TargetPassConfig. +  bool RunOnAllFunctions = true; + +  // Collection of IR functions created by the outliner. +  std::vector<Function *> CreatedIRFunctions; +    StringRef getPassName() const override { return "Machine Outliner"; }    void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -783,27 +706,35 @@ struct MachineOutliner : public ModulePass {      ModulePass::getAnalysisUsage(AU);    } -  MachineOutliner(bool OutlineFromLinkOnceODRs = false) -      : ModulePass(ID), OutlineFromLinkOnceODRs(OutlineFromLinkOnceODRs) { +  MachineOutliner() : ModulePass(ID) {      initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());    } +  /// Remark output explaining that not outlining a set of candidates would be +  /// better than outlining that set. +  void emitNotOutliningCheaperRemark( +      unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq, +      OutlinedFunction &OF); + +  /// Remark output explaining that a function was outlined. +  void emitOutlinedFunctionRemark(OutlinedFunction &OF); +    /// Find all repeated substrings that satisfy the outlining cost model.    ///    /// If a substring appears at least twice, then it must be represented by -  /// an internal node which appears in at least two suffixes. Each suffix is -  /// represented by a leaf node. To do this, we visit each internal node in -  /// the tree, using the leaf children of each internal node. If an internal -  /// node represents a beneficial substring, then we use each of its leaf -  /// children to find the locations of its substring. +  /// an internal node which appears in at least two suffixes. Each suffix +  /// is represented by a leaf node. To do this, we visit each internal node +  /// in the tree, using the leaf children of each internal node. If an +  /// internal node represents a beneficial substring, then we use each of +  /// its leaf children to find the locations of its substring.    ///    /// \param ST A suffix tree to query.    /// \param TII TargetInstrInfo for the target.    /// \param Mapper Contains outlining mapping information.    /// \param[out] CandidateList Filled with candidates representing each    /// beneficial substring. -  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each -  /// type of candidate. +  /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions +  /// each type of candidate.    ///    /// \returns The length of the longest candidate found.    unsigned @@ -812,7 +743,7 @@ struct MachineOutliner : public ModulePass {                   std::vector<std::shared_ptr<Candidate>> &CandidateList,                   std::vector<OutlinedFunction> &FunctionList); -  /// \brief Replace the sequences of instructions represented by the +  /// Replace the sequences of instructions represented by the    /// \p Candidates in \p CandidateList with calls to \p MachineFunctions    /// described in \p FunctionList.    /// @@ -852,7 +783,7 @@ struct MachineOutliner : public ModulePass {    /// Removes \p C from the candidate list, and updates its \p OutlinedFunction.    void prune(Candidate &C, std::vector<OutlinedFunction> &FunctionList); -  /// \brief Remove any overlapping candidates that weren't handled by the +  /// Remove any overlapping candidates that weren't handled by the    /// suffix tree's pruning method.    ///    /// Pruning from the suffix tree doesn't necessarily remove all overlaps. @@ -873,6 +804,16 @@ struct MachineOutliner : public ModulePass {    /// Construct a suffix tree on the instructions in \p M and outline repeated    /// strings from that tree.    bool runOnModule(Module &M) override; + +  /// Return a DISubprogram for OF if one exists, and null otherwise. Helper +  /// function for remark emission. +  DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) { +    DISubprogram *SP; +    for (const std::shared_ptr<Candidate> &C : OF.Candidates) +      if (C && C->getMF() && (SP = C->getMF()->getFunction().getSubprogram())) +        return SP; +    return nullptr; +  }  };  } // Anonymous namespace. @@ -880,8 +821,10 @@ struct MachineOutliner : public ModulePass {  char MachineOutliner::ID = 0;  namespace llvm { -ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) { -  return new MachineOutliner(OutlineFromLinkOnceODRs); +ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) { +  MachineOutliner *OL = new MachineOutliner(); +  OL->RunOnAllFunctions = RunOnAllFunctions; +  return OL;  }  } // namespace llvm @@ -889,6 +832,65 @@ ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) {  INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,                  false) +void MachineOutliner::emitNotOutliningCheaperRemark( +    unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq, +    OutlinedFunction &OF) { +  Candidate &C = CandidatesForRepeatedSeq.front(); +  MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr); +  MORE.emit([&]() { +    MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", +                                      C.front()->getDebugLoc(), C.getMBB()); +    R << "Did not outline " << NV("Length", StringLen) << " instructions" +      << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size()) +      << " locations." +      << " Bytes from outlining all occurrences (" +      << NV("OutliningCost", OF.getOutliningCost()) << ")" +      << " >= Unoutlined instruction bytes (" +      << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")" +      << " (Also found at: "; + +    // Tell the user the other places the candidate was found. +    for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) { +      R << NV((Twine("OtherStartLoc") + Twine(i)).str(), +              CandidatesForRepeatedSeq[i].front()->getDebugLoc()); +      if (i != e - 1) +        R << ", "; +    } + +    R << ")"; +    return R; +  }); +} + +void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) { +  MachineBasicBlock *MBB = &*OF.MF->begin(); +  MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr); +  MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction", +                              MBB->findDebugLoc(MBB->begin()), MBB); +  R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by " +    << "outlining " << NV("Length", OF.Sequence.size()) << " instructions " +    << "from " << NV("NumOccurrences", OF.getOccurrenceCount()) +    << " locations. " +    << "(Found at: "; + +  // Tell the user the other places the candidate was found. +  for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) { + +    // Skip over things that were pruned. +    if (!OF.Candidates[i]->InCandidateList) +      continue; + +    R << NV((Twine("StartLoc") + Twine(i)).str(), +            OF.Candidates[i]->front()->getDebugLoc()); +    if (i != e - 1) +      R << ", "; +  } + +  R << ")"; + +  MORE.emit(R); +} +  unsigned MachineOutliner::findCandidates(      SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper,      std::vector<std::shared_ptr<Candidate>> &CandidateList, @@ -923,14 +925,6 @@ unsigned MachineOutliner::findCandidates(      // this vector.      std::vector<Candidate> CandidatesForRepeatedSeq; -    // Describes the start and end point of each candidate. This allows the -    // target to infer some information about each occurrence of each repeated -    // sequence. -    // FIXME: CandidatesForRepeatedSeq and this should be combined. -    std::vector< -        std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>> -        RepeatedSequenceLocs; -      // Figure out the call overhead for each instance of the sequence.      for (auto &ChildPair : Parent.Children) {        SuffixTreeNode *M = ChildPair.second; @@ -966,17 +960,18 @@ unsigned MachineOutliner::findCandidates(                          CandidatesForRepeatedSeq.end(),                          [&StartIdx, &EndIdx](const Candidate &C) {                            return (EndIdx < C.getStartIdx() || -                                  StartIdx > C.getEndIdx());  +                                  StartIdx > C.getEndIdx());                          })) {            // It doesn't overlap with anything, so we can outline it.            // Each sequence is over [StartIt, EndIt]. +          // Save the candidate and its location. +            MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];            MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; -          // Save the candidate and its location. -          CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, +          CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, +                                                EndIt, StartIt->getParent(),                                                  FunctionList.size()); -          RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt));          }        }      } @@ -984,69 +979,33 @@ unsigned MachineOutliner::findCandidates(      // We've found something we might want to outline.      // Create an OutlinedFunction to store it and check if it'd be beneficial      // to outline. -    TargetInstrInfo::MachineOutlinerInfo MInfo = -        TII.getOutlininingCandidateInfo(RepeatedSequenceLocs); +    OutlinedFunction OF = +        TII.getOutliningCandidateInfo(CandidatesForRepeatedSeq); + +    // If we deleted every candidate, then there's nothing to outline. +    if (OF.Candidates.empty()) +      continue; +      std::vector<unsigned> Seq;      for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)        Seq.push_back(ST.Str[i]); -    OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(), -                        Seq, MInfo); -    unsigned Benefit = OF.getBenefit(); +    OF.Sequence = Seq; +    OF.Name = FunctionList.size();      // Is it better to outline this candidate than not? -    if (Benefit < 1) { -      // Outlining this candidate would take more instructions than not -      // outlining. -      // Emit a remark explaining why we didn't outline this candidate. -      std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator> C = -          RepeatedSequenceLocs[0]; -      MachineOptimizationRemarkEmitter MORE( -          *(C.first->getParent()->getParent()), nullptr); -      MORE.emit([&]() { -        MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", -                                          C.first->getDebugLoc(), -                                          C.first->getParent()); -        R << "Did not outline " << NV("Length", StringLen) << " instructions" -          << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) -          << " locations." -          << " Instructions from outlining all occurrences (" -          << NV("OutliningCost", OF.getOutliningCost()) << ")" -          << " >= Unoutlined instruction count (" -          << NV("NotOutliningCost", StringLen * OF.getOccurrenceCount()) << ")" -          << " (Also found at: "; - -        // Tell the user the other places the candidate was found. -        for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) { -          R << NV((Twine("OtherStartLoc") + Twine(i)).str(), -                  RepeatedSequenceLocs[i].first->getDebugLoc()); -          if (i != e - 1) -            R << ", "; -        } - -        R << ")"; -        return R; -      }); - -      // Move to the next candidate. +    if (OF.getBenefit() < 1) { +      emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);        continue;      }      if (StringLen > MaxLen)        MaxLen = StringLen; -    // At this point, the candidate class is seen as beneficial. Set their -    // benefit values and save them in the candidate list. -    std::vector<std::shared_ptr<Candidate>> CandidatesForFn; -    for (Candidate &C : CandidatesForRepeatedSeq) { -      C.Benefit = Benefit; -      C.MInfo = MInfo; -      std::shared_ptr<Candidate> Cptr = std::make_shared<Candidate>(C); -      CandidateList.push_back(Cptr); -      CandidatesForFn.push_back(Cptr); -    } - +    // The function is beneficial. Save its candidates to the candidate list +    // for pruning. +    for (std::shared_ptr<Candidate> &C : OF.Candidates) +      CandidateList.push_back(C);      FunctionList.push_back(OF); -    FunctionList.back().Candidates = CandidatesForFn;      // Move to the next function.      Parent.IsInTree = false; @@ -1067,11 +1026,11 @@ void MachineOutliner::prune(Candidate &C,    // Remove C from the CandidateList.    C.InCandidateList = false; -  DEBUG(dbgs() << "- Removed a Candidate \n"; -        dbgs() << "--- Num fns left for candidate: " << F.getOccurrenceCount() -               << "\n"; -        dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit() -               << "\n";); +  LLVM_DEBUG(dbgs() << "- Removed a Candidate \n"; +             dbgs() << "--- Num fns left for candidate: " +                    << F.getOccurrenceCount() << "\n"; +             dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit() +                    << "\n";);  }  void MachineOutliner::pruneOverlaps( @@ -1119,7 +1078,7 @@ void MachineOutliner::pruneOverlaps(      if (C1.getStartIdx() > MaxCandidateLen)        FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen; -    // Compare against the candidates in the list that start at at most +    // Compare against the candidates in the list that start at most      // FarthestPossibleIdx indices away from C1. There are at most      // MaxCandidateLen of these.      for (auto Sit = It + 1; Sit != Et; Sit++) { @@ -1205,9 +1164,20 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,    // NOTE: If this is linkonceodr, then we can take advantage of linker deduping    // which gives us better results when we outline from linkonceodr functions. -  F->setLinkage(GlobalValue::PrivateLinkage); +  F->setLinkage(GlobalValue::InternalLinkage);    F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); +  // FIXME: Set nounwind, so we don't generate eh_frame? Haven't verified it's +  // necessary. + +  // Set optsize/minsize, so we don't insert padding between outlined +  // functions. +  F->addFnAttr(Attribute::OptimizeForSize); +  F->addFnAttr(Attribute::MinSize); + +  // Save F so that we can add debug info later if we need to. +  CreatedIRFunctions.push_back(F); +    BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);    IRBuilder<> Builder(EntryBB);    Builder.CreateRetVoid(); @@ -1221,8 +1191,6 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,    // Insert the new function into the module.    MF.insert(MF.begin(), &MBB); -  TII.insertOutlinerPrologue(MBB, MF, OF.MInfo); -    // Copy over the instructions for the function using the integer mappings in    // its sequence.    for (unsigned Str : OF.Sequence) { @@ -1231,13 +1199,53 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,      NewMI->dropMemRefs();      // Don't keep debug information for outlined instructions. -    // FIXME: This means outlined functions are currently undebuggable.      NewMI->setDebugLoc(DebugLoc());      MBB.insert(MBB.end(), NewMI);    } -  TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo); +  TII.buildOutlinedFrame(MBB, MF, OF); + +  // If there's a DISubprogram associated with this outlined function, then +  // emit debug info for the outlined function. +  if (DISubprogram *SP = getSubprogramOrNull(OF)) { +    // We have a DISubprogram. Get its DICompileUnit. +    DICompileUnit *CU = SP->getUnit(); +    DIBuilder DB(M, true, CU); +    DIFile *Unit = SP->getFile(); +    Mangler Mg; + +    // Walk over each IR function we created in the outliner and create +    // DISubprograms for each function. +    for (Function *F : CreatedIRFunctions) { +      // Get the mangled name of the function for the linkage name. +      std::string Dummy; +      llvm::raw_string_ostream MangledNameStream(Dummy); +      Mg.getNameWithPrefix(MangledNameStream, F, false); + +      DISubprogram *SP = DB.createFunction( +          Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), +          Unit /* File */, +          0 /* Line 0 is reserved for compiler-generated code. */, +          DB.createSubroutineType( +              DB.getOrCreateTypeArray(None)), /* void type */ +          false, true, 0, /* Line 0 is reserved for compiler-generated code. */ +          DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, +          true /* Outlined code is optimized code by definition. */); + +      // Don't add any new variables to the subprogram. +      DB.finalizeSubprogram(SP); + +      // Attach subprogram to the function. +      F->setSubprogram(SP); +    } + +    // We're done with the DIBuilder. +    DB.finalize(); +  } +  // Outlined functions shouldn't preserve liveness. +  MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); +  MF.getRegInfo().freezeReservedRegs(MF);    return &MF;  } @@ -1260,79 +1268,73 @@ bool MachineOutliner::outline(      if (OF.getBenefit() < 1)        continue; -    // If not, then outline it. -    assert(C.getStartIdx() < Mapper.InstrList.size() && -           "Candidate out of bounds!"); -    MachineBasicBlock *MBB = (*Mapper.InstrList[C.getStartIdx()]).getParent(); -    MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.getStartIdx()]; -    unsigned EndIdx = C.getEndIdx(); - -    assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); -    MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; -    assert(EndIt != MBB->end() && "EndIt out of bounds!"); - -    EndIt++; // Erase needs one past the end index. -      // Does this candidate have a function yet?      if (!OF.MF) {        OF.MF = createOutlinedFunction(M, OF, Mapper); -      MachineBasicBlock *MBB = &*OF.MF->begin(); - -      // Output a remark telling the user that an outlined function was created, -      // and explaining where it came from. -      MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr); -      MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction", -                                  MBB->findDebugLoc(MBB->begin()), MBB); -      R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) -        << " instructions by " -        << "outlining " << NV("Length", OF.Sequence.size()) << " instructions " -        << "from " << NV("NumOccurrences", OF.getOccurrenceCount()) -        << " locations. " -        << "(Found at: "; - -      // Tell the user the other places the candidate was found. -      for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) { - -        // Skip over things that were pruned. -        if (!OF.Candidates[i]->InCandidateList) -          continue; - -        R << NV( -            (Twine("StartLoc") + Twine(i)).str(), -            Mapper.InstrList[OF.Candidates[i]->getStartIdx()]->getDebugLoc()); -        if (i != e - 1) -          R << ", "; -      } - -      R << ")"; - -      MORE.emit(R); +      emitOutlinedFunctionRemark(OF);        FunctionsCreated++;      }      MachineFunction *MF = OF.MF; +    MachineBasicBlock &MBB = *C.getMBB(); +    MachineBasicBlock::iterator StartIt = C.front(); +    MachineBasicBlock::iterator EndIt = C.back(); +    assert(StartIt != C.getMBB()->end() && "StartIt out of bounds!"); +    assert(EndIt != C.getMBB()->end() && "EndIt out of bounds!"); +      const TargetSubtargetInfo &STI = MF->getSubtarget();      const TargetInstrInfo &TII = *STI.getInstrInfo();      // Insert a call to the new function and erase the old sequence. -    TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo); -    StartIt = Mapper.InstrList[C.getStartIdx()]; -    MBB->erase(StartIt, EndIt); +    auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C); + +    // If the caller tracks liveness, then we need to make sure that anything +    // we outline doesn't break liveness assumptions. +    // The outlined functions themselves currently don't track liveness, but +    // we should make sure that the ranges we yank things out of aren't +    // wrong. +    if (MBB.getParent()->getProperties().hasProperty( +            MachineFunctionProperties::Property::TracksLiveness)) { +      // Helper lambda for adding implicit def operands to the call instruction. +      auto CopyDefs = [&CallInst](MachineInstr &MI) { +        for (MachineOperand &MOP : MI.operands()) { +          // Skip over anything that isn't a register. +          if (!MOP.isReg()) +            continue; + +          // If it's a def, add it to the call instruction. +          if (MOP.isDef()) +            CallInst->addOperand( +                MachineOperand::CreateReg(MOP.getReg(), true, /* isDef = true */ +                                          true /* isImp = true */)); +        } +      }; + +      // Copy over the defs in the outlined range. +      // First inst in outlined range <-- Anything that's defined in this +      // ...                           .. range has to be added as an implicit +      // Last inst in outlined range  <-- def to the call instruction. +      std::for_each(CallInst, std::next(EndIt), CopyDefs); +    } +    // Erase from the point after where the call was inserted up to, and +    // including, the final instruction in the sequence. +    // Erase needs one past the end, so we need std::next there too. +    MBB.erase(std::next(StartIt), std::next(EndIt));      OutlinedSomething = true;      // Statistics.      NumOutlined++;    } -  DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); +  LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";);    return OutlinedSomething;  }  bool MachineOutliner::runOnModule(Module &M) { - -  // Is there anything in the module at all? +  // Check if there's anything in the module. If it's empty, then there's +  // nothing to outline.    if (M.empty())      return false; @@ -1342,25 +1344,67 @@ bool MachineOutliner::runOnModule(Module &M) {    const TargetRegisterInfo *TRI = STI.getRegisterInfo();    const TargetInstrInfo *TII = STI.getInstrInfo(); +  // If the user passed -enable-machine-outliner=always or +  // -enable-machine-outliner, the pass will run on all functions in the module. +  // Otherwise, if the target supports default outlining, it will run on all +  // functions deemed by the target to be worth outlining from by default. Tell +  // the user how the outliner is running. +  LLVM_DEBUG( +    dbgs() << "Machine Outliner: Running on "; +    if (RunOnAllFunctions) +      dbgs() << "all functions"; +    else +      dbgs() << "target-default functions"; +    dbgs() << "\n" +  ); + +  // If the user specifies that they want to outline from linkonceodrs, set +  // it here. +  OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining; +    InstructionMapper Mapper; -  // Build instruction mappings for each function in the module. +  // Build instruction mappings for each function in the module. Start by +  // iterating over each Function in M.    for (Function &F : M) { -    MachineFunction &MF = MMI.getOrCreateMachineFunction(F); -    // Is the function empty? Safe to outline from? -    if (F.empty() || -        !TII->isFunctionSafeToOutlineFrom(MF, OutlineFromLinkOnceODRs)) +    // If there's nothing in F, then there's no reason to try and outline from +    // it. +    if (F.empty()) +      continue; + +    // There's something in F. Check if it has a MachineFunction associated with +    // it. +    MachineFunction *MF = MMI.getMachineFunction(F); + +    // If it doesn't, then there's nothing to outline from. Move to the next +    // Function. +    if (!MF) +      continue; + +    if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))        continue; -    // If it is, look at each MachineBasicBlock in the function. -    for (MachineBasicBlock &MBB : MF) { +    // We have a MachineFunction. Ask the target if it's suitable for outlining. +    // If it isn't, then move on to the next Function in the module. +    if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs)) +      continue; -      // Is there anything in MBB? +    // We have a function suitable for outlining. Iterate over every +    // MachineBasicBlock in MF and try to map its instructions to a list of +    // unsigned integers. +    for (MachineBasicBlock &MBB : *MF) { +      // If there isn't anything in MBB, then there's no point in outlining from +      // it.        if (MBB.empty())          continue; -      // If yes, map it. +      // Check if MBB could be the target of an indirect branch. If it is, then +      // we don't want to outline from it. +      if (MBB.hasAddressTaken()) +        continue; + +      // MBB is suitable for outlining. Map it to a list of unsigneds.        Mapper.convertToUnsignedVec(MBB, *TRI, *TII);      }    } @@ -1378,5 +1422,7 @@ bool MachineOutliner::runOnModule(Module &M) {    pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);    // Outline each of the candidates and return true if something was outlined. -  return outline(M, CandidateList, FunctionList, Mapper); +  bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper); + +  return OutlinedSomething;  } diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp index 18cb9af499a6..9bb00aaef86d 100644 --- a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -10,14 +10,14 @@  // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.  //  // Software pipelining (SWP) is an instruction scheduling technique for loops -// that overlap loop iterations and explioits ILP via a compiler transformation. +// that overlap loop iterations and exploits ILP via a compiler transformation.  //  // Swing Modulo Scheduling is an implementation of software pipelining  // that generates schedules that are near optimal in terms of initiation  // interval, register requirements, and stage count. See the papers:  //  // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, -// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996 +// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996  // Conference on Parallel Architectures and Compilation Techiniques.  //  // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. @@ -93,6 +93,7 @@  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/Function.h" @@ -125,6 +126,7 @@ using namespace llvm;  STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");  STATISTIC(NumPipelined, "Number of loops software pipelined"); +STATISTIC(NumNodeOrderIssues, "Number of node order issues found");  /// A command line option to turn software pipelining on or off.  static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), @@ -138,7 +140,7 @@ static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",  /// A command line argument to limit minimum initial interval for pipelining.  static cl::opt<int> SwpMaxMii("pipeliner-max-mii", -                              cl::desc("Size limit for the the MII."), +                              cl::desc("Size limit for the MII."),                                cl::Hidden, cl::init(27));  /// A command line argument to limit the number of stages in the pipeline. @@ -217,6 +219,7 @@ public:    }  private: +  void preprocessPhiNodes(MachineBasicBlock &B);    bool canPipelineLoop(MachineLoop &L);    bool scheduleLoop(MachineLoop &L);    bool swingModuloScheduler(MachineLoop &L); @@ -241,6 +244,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {    struct NodeInfo {      int ASAP = 0;      int ALAP = 0; +    int ZeroLatencyDepth = 0; +    int ZeroLatencyHeight = 0;      NodeInfo() = default;    }; @@ -313,15 +318,27 @@ public:    /// Return the latest time an instruction my be scheduled.    int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } -  /// The mobility function, which the the number of slots in which +  /// The mobility function, which the number of slots in which    /// an instruction may be scheduled.    int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }    /// The depth, in the dependence graph, for a node. -  int getDepth(SUnit *Node) { return Node->getDepth(); } +  unsigned getDepth(SUnit *Node) { return Node->getDepth(); } + +  /// The maximum unweighted length of a path from an arbitrary node to the +  /// given node in which each edge has latency 0 +  int getZeroLatencyDepth(SUnit *Node) { +    return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; +  }    /// The height, in the dependence graph, for a node. -  int getHeight(SUnit *Node) { return Node->getHeight(); } +  unsigned getHeight(SUnit *Node) { return Node->getHeight(); } + +  /// The maximum unweighted length of a path from the given node to an +  /// arbitrary node in which each edge has latency 0 +  int getZeroLatencyHeight(SUnit *Node) { +    return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; +  }    /// Return true if the dependence is a back-edge in the data dependence graph.    /// Since the DAG doesn't contain cycles, we represent a cycle in the graph @@ -332,29 +349,7 @@ public:      return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();    } -  /// Return true if the dependence is an order dependence between non-Phis. -  static bool isOrder(SUnit *Source, const SDep &Dep) { -    if (Dep.getKind() != SDep::Order) -      return false; -    return (!Source->getInstr()->isPHI() && -            !Dep.getSUnit()->getInstr()->isPHI()); -  } - -  bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true); - -  /// The latency of the dependence. -  unsigned getLatency(SUnit *Source, const SDep &Dep) { -    // Anti dependences represent recurrences, so use the latency of the -    // instruction on the back-edge. -    if (Dep.getKind() == SDep::Anti) { -      if (Source->getInstr()->isPHI()) -        return Dep.getSUnit()->Latency; -      if (Dep.getSUnit()->getInstr()->isPHI()) -        return Source->Latency; -      return Dep.getLatency(); -    } -    return Dep.getLatency(); -  } +  bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);    /// The distance function, which indicates that operation V of iteration I    /// depends on operations U of iteration I-distance. @@ -404,6 +399,7 @@ private:    void addConnectedNodes(SUnit *SU, NodeSet &NewSet,                           SetVector<SUnit *> &NodesAdded);    void computeNodeOrder(NodeSetType &NodeSets); +  void checkValidNodeOrder(const NodeSetType &Circuits) const;    bool schedulePipeline(SMSchedule &Schedule);    void generatePipelinedLoop(SMSchedule &Schedule);    void generateProlog(SMSchedule &Schedule, unsigned LastStage, @@ -438,7 +434,7 @@ private:                                      unsigned InstStageNum,                                      SMSchedule &Schedule);    void updateInstruction(MachineInstr *NewMI, bool LastDef, -                         unsigned CurStageNum, unsigned InstStageNum, +                         unsigned CurStageNum, unsigned InstrStageNum,                           SMSchedule &Schedule, ValueMapTy *VRMap);    MachineInstr *findDefInLoop(unsigned Reg);    unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, @@ -465,15 +461,22 @@ class NodeSet {    bool HasRecurrence = false;    unsigned RecMII = 0;    int MaxMOV = 0; -  int MaxDepth = 0; +  unsigned MaxDepth = 0;    unsigned Colocate = 0;    SUnit *ExceedPressure = nullptr; +  unsigned Latency = 0;  public:    using iterator = SetVector<SUnit *>::const_iterator;    NodeSet() = default; -  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {} +  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { +    Latency = 0; +    for (unsigned i = 0, e = Nodes.size(); i < e; ++i) +      for (const SDep &Succ : Nodes[i]->Succs) +        if (Nodes.count(Succ.getSUnit())) +          Latency += Succ.getLatency(); +  }    bool insert(SUnit *SU) { return Nodes.insert(SU); } @@ -513,6 +516,10 @@ public:      }    } +  unsigned getLatency() { return Latency; } + +  unsigned getMaxDepth() { return MaxDepth; } +    void clear() {      Nodes.clear();      RecMII = 0; @@ -563,7 +570,7 @@ public:  #endif  }; -/// This class repesents the scheduled code.  The main data structure is a +/// This class represents the scheduled code.  The main data structure is a  /// map from scheduled cycle to instructions.  During scheduling, the  /// data structure explicitly represents all stages/iterations.   When  /// the algorithm finshes, the schedule is collapsed into a single stage, @@ -700,10 +707,10 @@ public:    bool isValidSchedule(SwingSchedulerDAG *SSD);    void finalizeSchedule(SwingSchedulerDAG *SSD); -  bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, +  void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,                         std::deque<SUnit *> &Insts);    bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi); -  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst, +  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,                               MachineOperand &MO);    void print(raw_ostream &os) const;    void dump() const; @@ -804,20 +811,41 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {    if (!L.getLoopPreheader())      return false; -  // If any of the Phis contain subregs, then we can't pipeline -  // because we don't know how to maintain subreg information in the -  // VMap structure. -  MachineBasicBlock *MBB = L.getHeader(); -  for (MachineBasicBlock::iterator BBI = MBB->instr_begin(), -                                   BBE = MBB->getFirstNonPHI(); -       BBI != BBE; ++BBI) -    for (unsigned i = 1; i != BBI->getNumOperands(); i += 2) -      if (BBI->getOperand(i).getSubReg() != 0) -        return false; - +  // Remove any subregisters from inputs to phi nodes. +  preprocessPhiNodes(*L.getHeader());    return true;  } +void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) { +  MachineRegisterInfo &MRI = MF->getRegInfo(); +  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes(); + +  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) { +    MachineOperand &DefOp = PI.getOperand(0); +    assert(DefOp.getSubReg() == 0); +    auto *RC = MRI.getRegClass(DefOp.getReg()); + +    for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) { +      MachineOperand &RegOp = PI.getOperand(i); +      if (RegOp.getSubReg() == 0) +        continue; + +      // If the operand uses a subregister, replace it with a new register +      // without subregisters, and generate a copy to the new register. +      unsigned NewReg = MRI.createVirtualRegister(RC); +      MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB(); +      MachineBasicBlock::iterator At = PredB.getFirstTerminator(); +      const DebugLoc &DL = PredB.findDebugLoc(At); +      auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg) +                    .addReg(RegOp.getReg(), getRegState(RegOp), +                            RegOp.getSubReg()); +      Slots.insertMachineInstrInMaps(*Copy); +      RegOp.setReg(NewReg); +      RegOp.setSubReg(0); +    } +  } +} +  /// The SMS algorithm consists of the following main steps:  /// 1. Computation and analysis of the dependence graph.  /// 2. Ordering of the nodes (instructions). @@ -858,13 +886,14 @@ void SwingSchedulerDAG::schedule() {    Topo.InitDAGTopologicalSorting();    postprocessDAG();    changeDependences(); -  DEBUG({ +  LLVM_DEBUG({      for (unsigned su = 0, e = SUnits.size(); su != e; ++su)        SUnits[su].dumpAll(this);    });    NodeSetType NodeSets;    findCircuits(NodeSets); +  NodeSetType Circuits = NodeSets;    // Calculate the MII.    unsigned ResMII = calculateResMII(); @@ -877,8 +906,8 @@ void SwingSchedulerDAG::schedule() {      RecMII = 0;    MII = std::max(ResMII, RecMII); -  DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII -               << ")\n"); +  LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII +                    << ", res=" << ResMII << ")\n");    // Can't schedule a loop without a valid MII.    if (MII == 0) @@ -896,20 +925,20 @@ void SwingSchedulerDAG::schedule() {    checkNodeSets(NodeSets); -  DEBUG({ +  LLVM_DEBUG({      for (auto &I : NodeSets) {        dbgs() << "  Rec NodeSet ";        I.dump();      }    }); -  std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>()); +  std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());    groupRemainingNodes(NodeSets);    removeDuplicateNodes(NodeSets); -  DEBUG({ +  LLVM_DEBUG({      for (auto &I : NodeSets) {        dbgs() << "  NodeSet ";        I.dump(); @@ -918,6 +947,9 @@ void SwingSchedulerDAG::schedule() {    computeNodeOrder(NodeSets); +  // check for node order issues +  checkValidNodeOrder(Circuits); +    SMSchedule Schedule(Pass.MF);    Scheduled = schedulePipeline(Schedule); @@ -972,7 +1004,7 @@ static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {    return 0;  } -/// Return the Phi register value that comes the the loop block. +/// Return the Phi register value that comes the loop block.  static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {    for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)      if (Phi.getOperand(i + 1).getMBB() == LoopBB) @@ -1022,6 +1054,13 @@ static void getUnderlyingObjects(MachineInstr *MI,    if (!MM->getValue())      return;    GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL); +  for (Value *V : Objs) { +    if (!isIdentifiedObject(V)) { +      Objs.clear(); +      return; +    } +    Objs.push_back(V); +  }  }  /// Add a chain edge between a load and store if the store can be an @@ -1030,6 +1069,8 @@ static void getUnderlyingObjects(MachineInstr *MI,  /// but that code doesn't create loop carried dependences.  void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {    MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads; +  Value *UnknownValue = +    UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));    for (auto &SU : SUnits) {      MachineInstr &MI = *SU.getInstr();      if (isDependenceBarrier(MI, AA)) @@ -1037,6 +1078,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {      else if (MI.mayLoad()) {        SmallVector<Value *, 4> Objs;        getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); +      if (Objs.empty()) +        Objs.push_back(UnknownValue);        for (auto V : Objs) {          SmallVector<SUnit *, 4> &SUs = PendingLoads[V];          SUs.push_back(&SU); @@ -1044,6 +1087,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {      } else if (MI.mayStore()) {        SmallVector<Value *, 4> Objs;        getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); +      if (Objs.empty()) +        Objs.push_back(UnknownValue);        for (auto V : Objs) {          MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =              PendingLoads.find(V); @@ -1058,33 +1103,39 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {            // offset, then mark the dependence as loop carried potentially.            unsigned BaseReg1, BaseReg2;            int64_t Offset1, Offset2; -          if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) || -              !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { -            SU.addPred(SDep(Load, SDep::Barrier)); -            continue;             -          } -          if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { -            assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && -                   "What happened to the chain edge?"); -            SU.addPred(SDep(Load, SDep::Barrier)); -            continue; +          if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) && +              TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { +            if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { +              assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && +                     "What happened to the chain edge?"); +              SDep Dep(Load, SDep::Barrier); +              Dep.setLatency(1); +              SU.addPred(Dep); +              continue; +            }            }            // Second, the more expensive check that uses alias analysis on the            // base registers. If they alias, and the load offset is less than            // the store offset, the mark the dependence as loop carried.            if (!AA) { -            SU.addPred(SDep(Load, SDep::Barrier)); +            SDep Dep(Load, SDep::Barrier); +            Dep.setLatency(1); +            SU.addPred(Dep);              continue;            }            MachineMemOperand *MMO1 = *LdMI.memoperands_begin();            MachineMemOperand *MMO2 = *MI.memoperands_begin();            if (!MMO1->getValue() || !MMO2->getValue()) { -            SU.addPred(SDep(Load, SDep::Barrier)); +            SDep Dep(Load, SDep::Barrier); +            Dep.setLatency(1); +            SU.addPred(Dep);              continue;            }            if (MMO1->getValue() == MMO2->getValue() &&                MMO1->getOffset() <= MMO2->getOffset()) { -            SU.addPred(SDep(Load, SDep::Barrier)); +            SDep Dep(Load, SDep::Barrier); +            Dep.setLatency(1); +            SU.addPred(Dep);              continue;            }            AliasResult AAResult = AA->alias( @@ -1093,8 +1144,11 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {                MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,                               MMO2->getAAInfo())); -          if (AAResult != NoAlias) -            SU.addPred(SDep(Load, SDep::Barrier)); +          if (AAResult != NoAlias) { +            SDep Dep(Load, SDep::Barrier); +            Dep.setLatency(1); +            SU.addPred(Dep); +          }          }        }      } @@ -1136,6 +1190,7 @@ void SwingSchedulerDAG::updatePhiDependences() {            if (SU != nullptr && UseMI->isPHI()) {              if (!MI->isPHI()) {                SDep Dep(SU, SDep::Anti, Reg); +              Dep.setLatency(1);                I.addPred(Dep);              } else {                HasPhiDef = Reg; @@ -1382,7 +1437,7 @@ unsigned SwingSchedulerDAG::calculateResMII() {  /// Iterate over each circuit.  Compute the delay(c) and distance(c)  /// for each circuit. The II needs to satisfy the inequality  /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest -/// II that satistifies the inequality, and the RecMII is the maximum +/// II that satisfies the inequality, and the RecMII is the maximum  /// of those values.  unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {    unsigned RecMII = 0; @@ -1391,7 +1446,7 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {      if (Nodes.empty())        continue; -    unsigned Delay = Nodes.size() - 1; +    unsigned Delay = Nodes.getLatency();      unsigned Distance = 1;      // ii = ceil(delay / distance) @@ -1437,10 +1492,23 @@ static void swapAntiDependences(std::vector<SUnit> &SUnits) {  void SwingSchedulerDAG::Circuits::createAdjacencyStructure(      SwingSchedulerDAG *DAG) {    BitVector Added(SUnits.size()); +  DenseMap<int, int> OutputDeps;    for (int i = 0, e = SUnits.size(); i != e; ++i) {      Added.reset();      // Add any successor to the adjacency matrix and exclude duplicates.      for (auto &SI : SUnits[i].Succs) { +      // Only create a back-edge on the first and last nodes of a dependence +      // chain. This records any chains and adds them later. +      if (SI.getKind() == SDep::Output) { +        int N = SI.getSUnit()->NodeNum; +        int BackEdge = i; +        auto Dep = OutputDeps.find(BackEdge); +        if (Dep != OutputDeps.end()) { +          BackEdge = Dep->second; +          OutputDeps.erase(Dep); +        } +        OutputDeps[N] = BackEdge; +      }        // Do not process a boundary node and a back-edge is processed only        // if it goes to a Phi.        if (SI.getSUnit()->isBoundaryNode() || @@ -1456,7 +1524,7 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(      // adjacency matrix.      for (auto &PI : SUnits[i].Preds) {        if (!SUnits[i].getInstr()->mayStore() || -          !DAG->isLoopCarriedOrder(&SUnits[i], PI, false)) +          !DAG->isLoopCarriedDep(&SUnits[i], PI, false))          continue;        if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {          int N = PI.getSUnit()->NodeNum; @@ -1467,6 +1535,12 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(        }      }    } +  // Add back-eges in the adjacency matrix for the output dependences. +  for (auto &OD : OutputDeps) +    if (!Added.test(OD.second)) { +      AdjK[OD.first].push_back(OD.second); +      Added.set(OD.second); +    }  }  /// Identify an elementary circuit in the dependence graph starting at the @@ -1543,7 +1617,7 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {  }  /// Return true for DAG nodes that we ignore when computing the cost functions. -/// We ignore the back-edge recurrence in order to avoid unbounded recurison +/// We ignore the back-edge recurrence in order to avoid unbounded recursion  /// in the calculation of the ASAP, ALAP, etc functions.  static bool ignoreDependence(const SDep &D, bool isPred) {    if (D.isArtificial()) @@ -1560,7 +1634,7 @@ static bool ignoreDependence(const SDep &D, bool isPred) {  void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {    ScheduleInfo.resize(SUnits.size()); -  DEBUG({ +  LLVM_DEBUG({      for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),                                                      E = Topo.end();           I != E; ++I) { @@ -1570,49 +1644,59 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {    });    int maxASAP = 0; -  // Compute ASAP. +  // Compute ASAP and ZeroLatencyDepth.    for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),                                                    E = Topo.end();         I != E; ++I) {      int asap = 0; +    int zeroLatencyDepth = 0;      SUnit *SU = &SUnits[*I];      for (SUnit::const_pred_iterator IP = SU->Preds.begin(),                                      EP = SU->Preds.end();           IP != EP; ++IP) { +      SUnit *pred = IP->getSUnit(); +      if (IP->getLatency() == 0) +        zeroLatencyDepth = +            std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);        if (ignoreDependence(*IP, true))          continue; -      SUnit *pred = IP->getSUnit(); -      asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) - +      asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -                                    getDistance(pred, SU, *IP) * MII));      }      maxASAP = std::max(maxASAP, asap);      ScheduleInfo[*I].ASAP = asap; +    ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;    } -  // Compute ALAP and MOV. +  // Compute ALAP, ZeroLatencyHeight, and MOV.    for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),                                                            E = Topo.rend();         I != E; ++I) {      int alap = maxASAP; +    int zeroLatencyHeight = 0;      SUnit *SU = &SUnits[*I];      for (SUnit::const_succ_iterator IS = SU->Succs.begin(),                                      ES = SU->Succs.end();           IS != ES; ++IS) { +      SUnit *succ = IS->getSUnit(); +      if (IS->getLatency() == 0) +        zeroLatencyHeight = +            std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);        if (ignoreDependence(*IS, true))          continue; -      SUnit *succ = IS->getSUnit(); -      alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) + +      alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +                                    getDistance(SU, succ, *IS) * MII));      }      ScheduleInfo[*I].ALAP = alap; +    ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;    }    // After computing the node functions, compute the summary for each node set.    for (NodeSet &I : NodeSets)      I.computeNodeSetInfo(this); -  DEBUG({ +  LLVM_DEBUG({      for (unsigned i = 0; i < SUnits.size(); i++) {        dbgs() << "\tNode " << i << ":\n";        dbgs() << "\t   ASAP = " << getASAP(&SUnits[i]) << "\n"; @@ -1620,6 +1704,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {        dbgs() << "\t   MOV  = " << getMOV(&SUnits[i]) << "\n";        dbgs() << "\t   D    = " << getDepth(&SUnits[i]) << "\n";        dbgs() << "\t   H    = " << getHeight(&SUnits[i]) << "\n"; +      dbgs() << "\t   ZLD  = " << getZeroLatencyDepth(&SUnits[i]) << "\n"; +      dbgs() << "\t   ZLH  = " << getZeroLatencyHeight(&SUnits[i]) << "\n";      }    });  } @@ -1778,7 +1864,8 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {      RecRPTracker.closeBottom();      std::vector<SUnit *> SUnits(NS.begin(), NS.end()); -    std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) { +    llvm::sort(SUnits.begin(), SUnits.end(), +               [](const SUnit *A, const SUnit *B) {        return A->NodeNum > B->NodeNum;      }); @@ -1796,9 +1883,10 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {                                               CriticalPSets,                                               RecRegPressure.MaxSetPressure);        if (RPDelta.Excess.isValid()) { -        DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") " -                     << TRI->getRegPressureSetName(RPDelta.Excess.getPSet()) -                     << ":" << RPDelta.Excess.getUnitInc()); +        LLVM_DEBUG( +            dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") " +                   << TRI->getRegPressureSetName(RPDelta.Excess.getPSet()) +                   << ":" << RPDelta.Excess.getUnitInc());          NS.setExceedPressure(SU);          break;        } @@ -1834,25 +1922,23 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {  /// Check if the existing node-sets are profitable. If not, then ignore the  /// recurrent node-sets, and attempt to schedule all nodes together. This is -/// a heuristic. If the MII is large and there is a non-recurrent node with -/// a large depth compared to the MII, then it's best to try and schedule -/// all instruction together instead of starting with the recurrent node-sets. +/// a heuristic. If the MII is large and all the recurrent node-sets are small, +/// then it's best to try to schedule all instructions together instead of +/// starting with the recurrent node-sets.  void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {    // Look for loops with a large MII. -  if (MII <= 20) +  if (MII < 17)      return;    // Check if the node-set contains only a simple add recurrence. -  for (auto &NS : NodeSets) -    if (NS.size() > 2) +  for (auto &NS : NodeSets) { +    if (NS.getRecMII() > 2)        return; -  // If the depth of any instruction is significantly larger than the MII, then -  // ignore the recurrent node-sets and treat all instructions equally. -  for (auto &SU : SUnits) -    if (SU.getDepth() > MII * 1.5) { -      NodeSets.clear(); -      DEBUG(dbgs() << "Clear recurrence node-sets\n"); +    if (NS.getMaxDepth() > MII)        return; -    } +  } +  NodeSets.clear(); +  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n"); +  return;  }  /// Add the nodes that do not belong to a recurrence set into groups @@ -1907,7 +1993,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {    if (!NewSet.empty())      NodeSets.push_back(NewSet); -  // Create new nodes sets with the connected nodes any any remaining node that +  // Create new nodes sets with the connected nodes any remaining node that    // has no predecessor.    for (unsigned i = 0; i < SUnits.size(); ++i) {      SUnit *SU = &SUnits[i]; @@ -1988,14 +2074,6 @@ void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {      }  } -/// Return true if Inst1 defines a value that is used in Inst2. -static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) { -  for (auto &SI : Inst1->Succs) -    if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data) -      return true; -  return false; -} -  /// Compute an ordered list of the dependence graph nodes, which  /// indicates the order that the nodes will be scheduled.  This is a  /// two-level algorithm. First, a partial order is created, which @@ -2005,59 +2083,62 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {    NodeOrder.clear();    for (auto &Nodes : NodeSets) { -    DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n"); +    LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");      OrderKind Order;      SmallSetVector<SUnit *, 8> N;      if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {        R.insert(N.begin(), N.end());        Order = BottomUp; -      DEBUG(dbgs() << "  Bottom up (preds) "); +      LLVM_DEBUG(dbgs() << "  Bottom up (preds) ");      } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {        R.insert(N.begin(), N.end());        Order = TopDown; -      DEBUG(dbgs() << "  Top down (succs) "); +      LLVM_DEBUG(dbgs() << "  Top down (succs) ");      } else if (isIntersect(N, Nodes, R)) {        // If some of the successors are in the existing node-set, then use the        // top-down ordering.        Order = TopDown; -      DEBUG(dbgs() << "  Top down (intersect) "); +      LLVM_DEBUG(dbgs() << "  Top down (intersect) ");      } else if (NodeSets.size() == 1) {        for (auto &N : Nodes)          if (N->Succs.size() == 0)            R.insert(N);        Order = BottomUp; -      DEBUG(dbgs() << "  Bottom up (all) "); +      LLVM_DEBUG(dbgs() << "  Bottom up (all) ");      } else {        // Find the node with the highest ASAP.        SUnit *maxASAP = nullptr;        for (SUnit *SU : Nodes) { -        if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP)) +        if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) || +            (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))            maxASAP = SU;        }        R.insert(maxASAP);        Order = BottomUp; -      DEBUG(dbgs() << "  Bottom up (default) "); +      LLVM_DEBUG(dbgs() << "  Bottom up (default) ");      }      while (!R.empty()) {        if (Order == TopDown) {          // Choose the node with the maximum height.  If more than one, choose -        // the node with the lowest MOV. If still more than one, check if there -        // is a dependence between the instructions. +        // the node wiTH the maximum ZeroLatencyHeight. If still more than one, +        // choose the node with the lowest MOV.          while (!R.empty()) {            SUnit *maxHeight = nullptr;            for (SUnit *I : R) {              if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))                maxHeight = I;              else if (getHeight(I) == getHeight(maxHeight) && -                     getMOV(I) < getMOV(maxHeight) && -                     !hasDataDependence(maxHeight, I)) +                     getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))                maxHeight = I; -            else if (hasDataDependence(I, maxHeight)) +            else if (getHeight(I) == getHeight(maxHeight) && +                     getZeroLatencyHeight(I) == +                         getZeroLatencyHeight(maxHeight) && +                     getMOV(I) < getMOV(maxHeight))                maxHeight = I;            }            NodeOrder.insert(maxHeight); -          DEBUG(dbgs() << maxHeight->NodeNum << " "); +          LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");            R.remove(maxHeight);            for (const auto &I : maxHeight->Succs) {              if (Nodes.count(I.getSUnit()) == 0) @@ -2080,28 +2161,29 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {            }          }          Order = BottomUp; -        DEBUG(dbgs() << "\n   Switching order to bottom up "); +        LLVM_DEBUG(dbgs() << "\n   Switching order to bottom up ");          SmallSetVector<SUnit *, 8> N;          if (pred_L(NodeOrder, N, &Nodes))            R.insert(N.begin(), N.end());        } else {          // Choose the node with the maximum depth.  If more than one, choose -        // the node with the lowest MOV. If there is still more than one, check -        // for a dependence between the instructions. +        // the node with the maximum ZeroLatencyDepth. If still more than one, +        // choose the node with the lowest MOV.          while (!R.empty()) {            SUnit *maxDepth = nullptr;            for (SUnit *I : R) {              if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))                maxDepth = I;              else if (getDepth(I) == getDepth(maxDepth) && -                     getMOV(I) < getMOV(maxDepth) && -                     !hasDataDependence(I, maxDepth)) +                     getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))                maxDepth = I; -            else if (hasDataDependence(maxDepth, I)) +            else if (getDepth(I) == getDepth(maxDepth) && +                     getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) && +                     getMOV(I) < getMOV(maxDepth))                maxDepth = I;            }            NodeOrder.insert(maxDepth); -          DEBUG(dbgs() << maxDepth->NodeNum << " "); +          LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");            R.remove(maxDepth);            if (Nodes.isExceedSU(maxDepth)) {              Order = TopDown; @@ -2114,8 +2196,6 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {                continue;              if (NodeOrder.count(I.getSUnit()) != 0)                continue; -            if (I.getKind() == SDep::Anti) -              continue;              R.insert(I.getSUnit());            }            // Back-edges are predecessors with an anti-dependence. @@ -2130,16 +2210,16 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {            }          }          Order = TopDown; -        DEBUG(dbgs() << "\n   Switching order to top down "); +        LLVM_DEBUG(dbgs() << "\n   Switching order to top down ");          SmallSetVector<SUnit *, 8> N;          if (succ_L(NodeOrder, N, &Nodes))            R.insert(N.begin(), N.end());        }      } -    DEBUG(dbgs() << "\nDone with Nodeset\n"); +    LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");    } -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "Node order: ";      for (SUnit *I : NodeOrder)        dbgs() << " " << I->NodeNum << " "; @@ -2158,7 +2238,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {    for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {      Schedule.reset();      Schedule.setInitiationInterval(II); -    DEBUG(dbgs() << "Try to schedule with " << II << "\n"); +    LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");      SetVector<SUnit *>::iterator NI = NodeOrder.begin();      SetVector<SUnit *>::iterator NE = NodeOrder.end(); @@ -2175,12 +2255,12 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {        int SchedStart = INT_MIN;        Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,                              II, this); -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "Inst (" << SU->NodeNum << ") ";          SU->getInstr()->dump();          dbgs() << "\n";        }); -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart                 << " me: " << SchedEnd << " ms: " << SchedStart << "\n";        }); @@ -2216,7 +2296,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {              Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)            scheduleFound = false; -      DEBUG({ +      LLVM_DEBUG({          if (!scheduleFound)            dbgs() << "\tCan't schedule\n";        }); @@ -2227,7 +2307,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {        scheduleFound = Schedule.isValidSchedule(this);    } -  DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n"); +  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");    if (scheduleFound)      Schedule.finalizeSchedule(this); @@ -2250,7 +2330,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {    // Remember the registers that are used in different stages. The index is    // the iteration, or stage, that the instruction is scheduled in.  This is -  // a map between register names in the orignal block and the names created +  // a map between register names in the original block and the names created    // in each stage of the pipelined loop.    ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];    InstrMapTy InstrMap; @@ -2297,7 +2377,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {    generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,                 InstrMap, MaxStageCount, MaxStageCount, false); -  DEBUG(dbgs() << "New block\n"; KernelBB->dump();); +  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););    SmallVector<MachineBasicBlock *, 4> EpilogBBs;    // Generate the epilog instructions to complete the pipeline. @@ -2315,6 +2395,8 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {    addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);    // Remove the original loop since it's no longer referenced. +  for (auto &I : *BB) +    LIS.RemoveMachineInstrFromMaps(I);    BB->clear();    BB->eraseFromParent(); @@ -2364,7 +2446,7 @@ void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,        }      }      rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap); -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "prolog:\n";        NewBB->dump();      }); @@ -2431,7 +2513,9 @@ void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,            continue;          MachineInstr *In = &BBI;          if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) { -          MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0); +          // Instructions with memoperands in the epilog are updated with +          // conservative values. +          MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);            updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);            NewBB->push_back(NewMI);            InstrMap[NewMI] = In; @@ -2444,7 +2528,7 @@ void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,                   InstrMap, LastStage, EpilogStage, i == 1);      PredBB = NewBB; -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "epilog:\n";        NewBB->dump();      }); @@ -2550,24 +2634,20 @@ void SwingSchedulerDAG::generateExistingPhis(        // of the Phi value.        unsigned NewReg = VRMap[PrevStage][LoopVal];        rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI, -                            Def, NewReg); +                            Def, InitVal, NewReg);        if (VRMap[CurStageNum].count(LoopVal))          VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];      }      // Adjust the number of Phis needed depending on the number of prologs left, -    // and the distance from where the Phi is first scheduled. -    unsigned NumPhis = NumStages; -    if (!InKernel && (int)PrologStage < LoopValStage) -      // The NumPhis is the maximum number of new Phis needed during the steady -      // state. If the Phi has not been scheduled in current prolog, then we -      // need to generate less Phis. -      NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1); -    // The number of Phis cannot exceed the number of prolog stages. Each -    // stage can potentially define two values. -    NumPhis = std::min(NumPhis, PrologStage + 2); +    // and the distance from where the Phi is first scheduled. The number of +    // Phis cannot exceed the number of prolog stages. Each stage can +    // potentially define two values. +    unsigned MaxPhis = PrologStage + 2; +    if (!InKernel && (int)PrologStage <= LoopValStage) +      MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1); +    unsigned NumPhis = std::min(NumStages, MaxPhis);      unsigned NewReg = 0; -      unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;      // In the epilog, we may need to look back one stage to get the correct      // Phi name because the epilog and prolog blocks execute the same stage. @@ -2659,19 +2739,20 @@ void SwingSchedulerDAG::generateExistingPhis(        // references another Phi, and the other Phi is scheduled in an        // earlier stage. We can try to reuse an existing Phi up until the last        // stage of the current Phi. -      if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) { +      if (LoopDefIsPhi && (int)(PrologStage - np) >= StageScheduled) {          int LVNumStages = Schedule.getStagesForPhi(LoopVal);          int StageDiff = (StageScheduled - LoopValStage);          LVNumStages -= StageDiff; -        if (LVNumStages > (int)np) { +        // Make sure the loop value Phi has been processed already. +        if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {            NewReg = PhiOp2;            unsigned ReuseStage = CurStageNum;            if (Schedule.isLoopCarried(this, *PhiInst))              ReuseStage -= LVNumStages;            // Check if the Phi to reuse has been generated yet. If not, then            // there is nothing to reuse. -          if (VRMap[ReuseStage].count(LoopVal)) { -            NewReg = VRMap[ReuseStage][LoopVal]; +          if (VRMap[ReuseStage - np].count(LoopVal)) { +            NewReg = VRMap[ReuseStage - np][LoopVal];              rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,                                    &*BBI, Def, NewReg); @@ -2744,7 +2825,7 @@ void SwingSchedulerDAG::generateExistingPhis(  /// Generate Phis for the specified block in the generated pipelined code.  /// These are new Phis needed because the definition is scheduled after the -/// use in the pipelened sequence. +/// use in the pipelined sequence.  void SwingSchedulerDAG::generatePhis(      MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,      MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap, @@ -2874,6 +2955,13 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,          if (!MOI->isReg() || !MOI->isDef())            continue;          unsigned reg = MOI->getReg(); +        // Assume physical registers are used, unless they are marked dead. +        if (TargetRegisterInfo::isPhysicalRegister(reg)) { +          used = !MOI->isDead(); +          if (used) +            break; +          continue; +        }          unsigned realUses = 0;          for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),                                                 EI = MRI.use_end(); @@ -2891,6 +2979,7 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,          used = false;        }        if (!used) { +        LIS.RemoveMachineInstrFromMaps(*MI);          MI++->eraseFromParent();          continue;        } @@ -2905,6 +2994,7 @@ void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,      ++BBI;      unsigned reg = MI->getOperand(0).getReg();      if (MRI.use_begin(reg) == MRI.use_end()) { +      LIS.RemoveMachineInstrFromMaps(*MI);        MI->eraseFromParent();      }    } @@ -2924,10 +3014,8 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,                                         MBBVectorTy &EpilogBBs,                                         SMSchedule &Schedule) {    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); -  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), -                                   BBF = KernelBB->getFirstNonPHI(); -       BBI != BBF; ++BBI) { -    unsigned Def = BBI->getOperand(0).getReg(); +  for (auto &PHI : KernelBB->phis()) { +    unsigned Def = PHI.getOperand(0).getReg();      // Check for any Phi definition that used as an operand of another Phi      // in the same block.      for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), @@ -2935,7 +3023,7 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,           I != E; ++I) {        if (I->isPHI() && I->getParent() == KernelBB) {          // Get the loop carried definition. -        unsigned LCDef = getLoopPhiReg(*BBI, KernelBB); +        unsigned LCDef = getLoopPhiReg(PHI, KernelBB);          if (!LCDef)            continue;          MachineInstr *MI = MRI.getVRegDef(LCDef); @@ -3099,12 +3187,14 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,        continue;      }      unsigned Delta; -    if (computeDelta(OldMI, Delta)) { +    if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {        int64_t AdjOffset = Delta * Num;        NewMemRefs[Refs++] =            MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()); -    } else -      NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX); +    } else { +      NewMI.dropMemRefs(); +      return; +    }    }    NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);  } @@ -3249,13 +3339,11 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,                                           SMSchedule &Schedule,                                           ValueMapTy *VRMap,                                           InstrMapTy &InstrMap) { -  for (MachineBasicBlock::iterator BBI = BB->instr_begin(), -                                   BBE = BB->getFirstNonPHI(); -       BBI != BBE; ++BBI) { +  for (auto &PHI : BB->phis()) {      unsigned InitVal = 0;      unsigned LoopVal = 0; -    getPhiRegs(*BBI, BB, InitVal, LoopVal); -    unsigned PhiDef = BBI->getOperand(0).getReg(); +    getPhiRegs(PHI, BB, InitVal, LoopVal); +    unsigned PhiDef = PHI.getOperand(0).getReg();      unsigned PhiStage =          (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef))); @@ -3269,7 +3357,7 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,            getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);        if (!NewVal)          NewVal = InitVal; -      rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI, +      rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,                              PhiDef, NewVal);      }    } @@ -3375,10 +3463,15 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,    if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))      return false; -  // Make sure offset values are both positive or both negative. +  // Make sure that the instructions do not access the same memory location in +  // the next iteration.    int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();    int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm(); -  if ((LoadOffset >= 0) != (StoreOffset >= 0)) +  MachineInstr *NewMI = MF.CloneMachineInstr(MI); +  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset); +  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef); +  MF.DeleteMachineInstr(NewMI); +  if (!Disjoint)      return false;    // Set the return value once we determine that we return true. @@ -3425,17 +3518,21 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,    }  } -/// Return true for an order dependence that is loop carried potentially. -/// An order dependence is loop carried if the destination defines a value -/// that may be used by the source in a subsequent iteration. -bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep, -                                           bool isSucc) { -  if (!isOrder(Source, Dep) || Dep.isArtificial()) +/// Return true for an order or output dependence that is loop carried +/// potentially. A dependence is loop carried if the destination defines a valu +/// that may be used or defined by the source in a subsequent iteration. +bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, +                                         bool isSucc) { +  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) || +      Dep.isArtificial())      return false;    if (!SwpPruneLoopCarried)      return true; +  if (Dep.getKind() == SDep::Output) +    return true; +    MachineInstr *SI = Source->getInstr();    MachineInstr *DI = Dep.getSUnit()->getInstr();    if (!isSucc) @@ -3465,6 +3562,19 @@ bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,    if (BaseRegS != BaseRegD)      return true; +  // Check that the base register is incremented by a constant value for each +  // iteration. +  MachineInstr *Def = MRI.getVRegDef(BaseRegS); +  if (!Def || !Def->isPHI()) +    return true; +  unsigned InitVal = 0; +  unsigned LoopVal = 0; +  getPhiRegs(*Def, BB, InitVal, LoopVal); +  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal); +  int D = 0; +  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D)) +    return true; +    uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();    uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize(); @@ -3516,7 +3626,7 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {      }      if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||          Resources->canReserveResources(*SU->getInstr())) { -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "\tinsert at cycle " << curCycle << " ";          SU->getInstr()->dump();        }); @@ -3529,7 +3639,7 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {          FirstCycle = curCycle;        return true;      } -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "\tfailed to insert at cycle " << curCycle << " ";        SU->getInstr()->dump();      }); @@ -3553,7 +3663,7 @@ int SMSchedule::earliestCycleInChain(const SDep &Dep) {        continue;      EarlyCycle = std::min(EarlyCycle, it->second);      for (const auto &PI : PrevSU->Preds) -      if (SwingSchedulerDAG::isOrder(PrevSU, PI)) +      if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)          Worklist.push_back(PI);      Visited.insert(PrevSU);    } @@ -3576,7 +3686,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {        continue;      LateCycle = std::max(LateCycle, it->second);      for (const auto &SI : SuccSU->Succs) -      if (SwingSchedulerDAG::isOrder(SuccSU, SI)) +      if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)          Worklist.push_back(SI);      Visited.insert(SuccSU);    } @@ -3590,7 +3700,7 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {    for (auto &P : SU->Preds)      if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())        for (auto &S : P.getSUnit()->Succs) -        if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI()) +        if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())            return P.getSUnit();    return nullptr;  } @@ -3601,7 +3711,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,                                int *MinEnd, int *MaxStart, int II,                                SwingSchedulerDAG *DAG) {    // Iterate over each instruction that has been scheduled already.  The start -  // slot computuation depends on whether the previously scheduled instruction +  // slot computation depends on whether the previously scheduled instruction    // is a predecessor or successor of the specified instruction.    for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) { @@ -3613,15 +3723,15 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,          const SDep &Dep = SU->Preds[i];          if (Dep.getSUnit() == I) {            if (!DAG->isBackedge(SU, Dep)) { -            int EarlyStart = cycle + DAG->getLatency(SU, Dep) - +            int EarlyStart = cycle + Dep.getLatency() -                               DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;              *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); -            if (DAG->isLoopCarriedOrder(SU, Dep, false)) { +            if (DAG->isLoopCarriedDep(SU, Dep, false)) {                int End = earliestCycleInChain(Dep) + (II - 1);                *MinEnd = std::min(*MinEnd, End);              }            } else { -            int LateStart = cycle - DAG->getLatency(SU, Dep) + +            int LateStart = cycle - Dep.getLatency() +                              DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;              *MinLateStart = std::min(*MinLateStart, LateStart);            } @@ -3633,23 +3743,24 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,              !SU->isPred(I))            *MinLateStart = std::min(*MinLateStart, cycle);        } -      for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) +      for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {          if (SU->Succs[i].getSUnit() == I) {            const SDep &Dep = SU->Succs[i];            if (!DAG->isBackedge(SU, Dep)) { -            int LateStart = cycle - DAG->getLatency(SU, Dep) + +            int LateStart = cycle - Dep.getLatency() +                              DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;              *MinLateStart = std::min(*MinLateStart, LateStart); -            if (DAG->isLoopCarriedOrder(SU, Dep)) { +            if (DAG->isLoopCarriedDep(SU, Dep)) {                int Start = latestCycleInChain(Dep) + 1 - II;                *MaxStart = std::max(*MaxStart, Start);              }            } else { -            int EarlyStart = cycle + DAG->getLatency(SU, Dep) - +            int EarlyStart = cycle + Dep.getLatency() -                               DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;              *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);            }          } +      }      }    }  } @@ -3657,7 +3768,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,  /// Order the instructions within a cycle so that the definitions occur  /// before the uses. Returns true if the instruction is added to the start  /// of the list, or false if added to the end. -bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, +void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,                                   std::deque<SUnit *> &Insts) {    MachineInstr *MI = SU->getInstr();    bool OrderBeforeUse = false; @@ -3670,13 +3781,11 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,    unsigned Pos = 0;    for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;         ++I, ++Pos) { -    // Relative order of Phis does not matter. -    if (MI->isPHI() && (*I)->getInstr()->isPHI()) -      continue;      for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {        MachineOperand &MO = MI->getOperand(i);        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))          continue; +        unsigned Reg = MO.getReg();        unsigned BasePos, OffsetPos;        if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) @@ -3688,7 +3797,8 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,            (*I)->getInstr()->readsWritesVirtualRegister(Reg);        if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {          OrderBeforeUse = true; -        MoveUse = Pos; +        if (MoveUse == 0) +          MoveUse = Pos;        } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {          // Add the instruction after the scheduled instruction.          OrderAfterDef = true; @@ -3696,14 +3806,16 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,        } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {          if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {            OrderBeforeUse = true; -          MoveUse = Pos; +          if (MoveUse == 0) +            MoveUse = Pos;          } else {            OrderAfterDef = true;            MoveDef = Pos;          }        } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {          OrderBeforeUse = true; -        MoveUse = Pos; +        if (MoveUse == 0) +          MoveUse = Pos;          if (MoveUse != 0) {            OrderAfterDef = true;            MoveDef = Pos - 1; @@ -3711,49 +3823,35 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,        } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {          // Add the instruction before the scheduled instruction.          OrderBeforeUse = true; -        MoveUse = Pos; +        if (MoveUse == 0) +          MoveUse = Pos;        } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&                   isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) { -        OrderBeforeDef = true; -        MoveUse = Pos; +        if (MoveUse == 0) { +          OrderBeforeDef = true; +          MoveUse = Pos; +        }        }      }      // Check for order dependences between instructions. Make sure the source      // is ordered before the destination. -    for (auto &S : SU->Succs) -      if (S.getKind() == SDep::Order) { -        if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) { -          OrderBeforeUse = true; -          MoveUse = Pos; -        } -      } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) { -        if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) { -          if (S.isAssignedRegDep()) { -            OrderAfterDef = true; -            MoveDef = Pos; -          } -        } else { -          OrderBeforeUse = true; +    for (auto &S : SU->Succs) { +      if (S.getSUnit() != *I) +        continue; +      if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) { +        OrderBeforeUse = true; +        if (Pos < MoveUse)            MoveUse = Pos; -        }        } -    for (auto &P : SU->Preds) -      if (P.getKind() == SDep::Order) { -        if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) { -          OrderAfterDef = true; -          MoveDef = Pos; -        } -      } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) { -        if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) { -          if (P.isAssignedRegDep()) { -            OrderBeforeUse = true; -            MoveUse = Pos; -          } -        } else { -          OrderAfterDef = true; -          MoveDef = Pos; -        } +    } +    for (auto &P : SU->Preds) { +      if (P.getSUnit() != *I) +        continue; +      if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) { +        OrderAfterDef = true; +        MoveDef = Pos;        } +    }    }    // A circular dependence. @@ -3777,16 +3875,10 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,        Insts.erase(Insts.begin() + MoveDef);        Insts.erase(Insts.begin() + MoveUse);      } -    if (orderDependence(SSD, UseSU, Insts)) { -      Insts.push_front(SU); -      orderDependence(SSD, DefSU, Insts); -      return true; -    } -    Insts.pop_back(); -    Insts.push_back(SU); -    Insts.push_back(UseSU); +    orderDependence(SSD, UseSU, Insts); +    orderDependence(SSD, SU, Insts);      orderDependence(SSD, DefSU, Insts); -    return false; +    return;    }    // Put the new instruction first if there is a use in the list. Otherwise,    // put it at the end of the list. @@ -3794,14 +3886,13 @@ bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,      Insts.push_front(SU);    else      Insts.push_back(SU); -  return OrderBeforeUse;  }  /// Return true if the scheduled Phi has a loop carried operand.  bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {    if (!Phi.isPHI())      return false; -  assert(Phi.isPHI() && "Expecing a Phi."); +  assert(Phi.isPHI() && "Expecting a Phi.");    SUnit *DefSU = SSD->getSUnit(&Phi);    unsigned DefCycle = cycleScheduled(DefSU);    int DefStage = stageScheduled(DefSU); @@ -3868,6 +3959,100 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {    return true;  } +/// A property of the node order in swing-modulo-scheduling is +/// that for nodes outside circuits the following holds: +/// none of them is scheduled after both a successor and a +/// predecessor. +/// The method below checks whether the property is met. +/// If not, debug information is printed and statistics information updated. +/// Note that we do not use an assert statement. +/// The reason is that although an invalid node oder may prevent +/// the pipeliner from finding a pipelined schedule for arbitrary II, +/// it does not lead to the generation of incorrect code. +void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { + +  // a sorted vector that maps each SUnit to its index in the NodeOrder +  typedef std::pair<SUnit *, unsigned> UnitIndex; +  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0)); + +  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) +    Indices.push_back(std::make_pair(NodeOrder[i], i)); + +  auto CompareKey = [](UnitIndex i1, UnitIndex i2) { +    return std::get<0>(i1) < std::get<0>(i2); +  }; + +  // sort, so that we can perform a binary search +  llvm::sort(Indices.begin(), Indices.end(), CompareKey); + +  bool Valid = true; +  (void)Valid; +  // for each SUnit in the NodeOrder, check whether +  // it appears after both a successor and a predecessor +  // of the SUnit. If this is the case, and the SUnit +  // is not part of circuit, then the NodeOrder is not +  // valid. +  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) { +    SUnit *SU = NodeOrder[i]; +    unsigned Index = i; + +    bool PredBefore = false; +    bool SuccBefore = false; + +    SUnit *Succ; +    SUnit *Pred; +    (void)Succ; +    (void)Pred; + +    for (SDep &PredEdge : SU->Preds) { +      SUnit *PredSU = PredEdge.getSUnit(); +      unsigned PredIndex = +          std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(), +                                        std::make_pair(PredSU, 0), CompareKey)); +      if (!PredSU->getInstr()->isPHI() && PredIndex < Index) { +        PredBefore = true; +        Pred = PredSU; +        break; +      } +    } + +    for (SDep &SuccEdge : SU->Succs) { +      SUnit *SuccSU = SuccEdge.getSUnit(); +      unsigned SuccIndex = +          std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(), +                                        std::make_pair(SuccSU, 0), CompareKey)); +      if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) { +        SuccBefore = true; +        Succ = SuccSU; +        break; +      } +    } + +    if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) { +      // instructions in circuits are allowed to be scheduled +      // after both a successor and predecessor. +      bool InCircuit = std::any_of( +          Circuits.begin(), Circuits.end(), +          [SU](const NodeSet &Circuit) { return Circuit.count(SU); }); +      if (InCircuit) +        LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";); +      else { +        Valid = false; +        NumNodeOrderIssues++; +        LLVM_DEBUG(dbgs() << "Predecessor ";); +      } +      LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum +                        << " are scheduled before node " << SU->NodeNum +                        << "\n";); +    } +  } + +  LLVM_DEBUG({ +    if (!Valid) +      dbgs() << "Invalid node order found!\n"; +  }); +} +  /// Attempt to fix the degenerate cases when the instruction serialization  /// causes the register lifetimes to overlap. For example,  ///   p' = store_pi(p, b) @@ -3987,27 +4172,25 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {    // generated code.    for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {      std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle]; -    std::deque<SUnit *> newOrderZC; -    // Put the zero-cost, pseudo instructions at the start of the cycle. +    std::deque<SUnit *> newOrderPhi;      for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {        SUnit *SU = cycleInstrs[i]; -      if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode())) -        orderDependence(SSD, SU, newOrderZC); +      if (SU->getInstr()->isPHI()) +        newOrderPhi.push_back(SU);      }      std::deque<SUnit *> newOrderI; -    // Then, add the regular instructions back.      for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {        SUnit *SU = cycleInstrs[i]; -      if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode())) +      if (!SU->getInstr()->isPHI())          orderDependence(SSD, SU, newOrderI);      }      // Replace the old order with the new order. -    cycleInstrs.swap(newOrderZC); +    cycleInstrs.swap(newOrderPhi);      cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());      SSD->fixupRegisterOverlaps(cycleInstrs);    } -  DEBUG(dump();); +  LLVM_DEBUG(dump(););  }  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp index 1e74104e89ed..2619d8f78276 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp @@ -11,6 +11,7 @@  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/RegionInfoImpl.h"  #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Pass.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" @@ -89,7 +90,7 @@ bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {    RI.recalculate(F, DT, PDT, DF); -  DEBUG(RI.dump()); +  LLVM_DEBUG(RI.dump());    return false;  } diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index b82ab02a6e6c..6095bdd06b69 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -22,6 +22,7 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/Function.h" @@ -65,23 +66,66 @@ void MachineRegisterInfo::setRegBank(unsigned Reg,    VRegInfo[Reg].first = &RegBank;  } -const TargetRegisterClass * -MachineRegisterInfo::constrainRegClass(unsigned Reg, -                                       const TargetRegisterClass *RC, -                                       unsigned MinNumRegs) { -  const TargetRegisterClass *OldRC = getRegClass(Reg); +static const TargetRegisterClass * +constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg, +                  const TargetRegisterClass *OldRC, +                  const TargetRegisterClass *RC, unsigned MinNumRegs) {    if (OldRC == RC)      return RC;    const TargetRegisterClass *NewRC = -    getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); +      MRI.getTargetRegisterInfo()->getCommonSubClass(OldRC, RC);    if (!NewRC || NewRC == OldRC)      return NewRC;    if (NewRC->getNumRegs() < MinNumRegs)      return nullptr; -  setRegClass(Reg, NewRC); +  MRI.setRegClass(Reg, NewRC);    return NewRC;  } +const TargetRegisterClass * +MachineRegisterInfo::constrainRegClass(unsigned Reg, +                                       const TargetRegisterClass *RC, +                                       unsigned MinNumRegs) { +  return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); +} + +bool +MachineRegisterInfo::constrainRegAttrs(unsigned Reg, +                                       unsigned ConstrainingReg, +                                       unsigned MinNumRegs) { +  auto const *OldRC = getRegClassOrNull(Reg); +  auto const *RC = getRegClassOrNull(ConstrainingReg); +  // A virtual register at any point must have either a low-level type +  // or a class assigned, but not both. The only exception is the internals of +  // GlobalISel's instruction selection pass, which is allowed to temporarily +  // introduce registers with types and classes both. +  assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type"); +  assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both"); +  assert((RC || getType(ConstrainingReg).isValid()) && +         "ConstrainingReg has neither class nor type"); +  assert((!RC || !getType(ConstrainingReg).isValid()) && +         "ConstrainingReg has class and type both"); +  if (OldRC && RC) +    return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs); +  // If one of the virtual registers is generic (used in generic machine +  // instructions, has a low-level type, doesn't have a class), and the other is +  // concrete (used in target specific instructions, doesn't have a low-level +  // type, has a class), we can not unify them. +  if (OldRC || RC) +    return false; +  // At this point, both registers are guaranteed to have a valid low-level +  // type, and they must agree. +  if (getType(Reg) != getType(ConstrainingReg)) +    return false; +  auto const *OldRB = getRegBankOrNull(Reg); +  auto const *RB = getRegBankOrNull(ConstrainingReg); +  if (OldRB) +    return !RB || RB == OldRB; +  if (RB) +    setRegBank(Reg, *RB); +  return true; +} +  bool  MachineRegisterInfo::recomputeRegClass(unsigned Reg) {    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -107,10 +151,11 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {    return true;  } -unsigned MachineRegisterInfo::createIncompleteVirtualRegister() { +unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {    unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());    VRegInfo.grow(Reg);    RegAllocHints.grow(Reg); +  insertVRegByName(Name, Reg);    return Reg;  } @@ -118,47 +163,42 @@ unsigned MachineRegisterInfo::createIncompleteVirtualRegister() {  /// function with the specified register class.  ///  unsigned -MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ +MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, +                                           StringRef Name) {    assert(RegClass && "Cannot create register without RegClass!");    assert(RegClass->isAllocatable() &&           "Virtual register RegClass must be allocatable.");    // New virtual register number. -  unsigned Reg = createIncompleteVirtualRegister(); +  unsigned Reg = createIncompleteVirtualRegister(Name);    VRegInfo[Reg].first = RegClass;    if (TheDelegate)      TheDelegate->MRI_NoteNewVirtualRegister(Reg);    return Reg;  } -LLT MachineRegisterInfo::getType(unsigned VReg) const { -  VRegToTypeMap::const_iterator TypeIt = getVRegToType().find(VReg); -  return TypeIt != getVRegToType().end() ? TypeIt->second : LLT{}; -} -  void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {    // Check that VReg doesn't have a class.    assert((getRegClassOrRegBank(VReg).isNull() ||           !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&           "Can't set the size of a non-generic virtual register"); -  getVRegToType()[VReg] = Ty; +  VRegToType.grow(VReg); +  VRegToType[VReg] = Ty;  }  unsigned -MachineRegisterInfo::createGenericVirtualRegister(LLT Ty) { +MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {    // New virtual register number. -  unsigned Reg = createIncompleteVirtualRegister(); +  unsigned Reg = createIncompleteVirtualRegister(Name);    // FIXME: Should we use a dummy register class?    VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr); -  getVRegToType()[Reg] = Ty; +  setType(Reg, Ty);    if (TheDelegate)      TheDelegate->MRI_NoteNewVirtualRegister(Reg);    return Reg;  } -void MachineRegisterInfo::clearVirtRegTypes() { -  getVRegToType().clear(); -} +void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); }  /// clearVirtRegs - Remove all virtual registers (after physreg assignment).  void MachineRegisterInfo::clearVirtRegs() { diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 36844e9fb30a..773661965f18 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -204,7 +204,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {    // If the client wants to know about all new instructions, tell it.    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); -  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n"); +  LLVM_DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");    return InsertedPHI->getOperand(0).getReg();  } diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index e15eb658a05c..502d18f08f93 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -32,7 +32,6 @@  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachinePassRegistry.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/CodeGen/RegisterClassInfo.h"  #include "llvm/CodeGen/RegisterPressure.h" @@ -48,6 +47,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSchedule.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/Pass.h"  #include "llvm/Support/CommandLine.h" @@ -55,6 +55,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/raw_ostream.h"  #include <algorithm>  #include <cassert> @@ -271,7 +272,7 @@ priorNonDebug(MachineBasicBlock::const_iterator I,                MachineBasicBlock::const_iterator Beg) {    assert(I != Beg && "reached the top of the region, cannot decrement");    while (--I != Beg) { -    if (!I->isDebugValue()) +    if (!I->isDebugInstr())        break;    }    return I; @@ -291,7 +292,7 @@ static MachineBasicBlock::const_iterator  nextIfDebug(MachineBasicBlock::const_iterator I,              MachineBasicBlock::const_iterator End) {    for(; I != End; ++I) { -    if (!I->isDebugValue()) +    if (!I->isDebugInstr())        break;    }    return I; @@ -344,7 +345,7 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {  /// This design avoids exposing scheduling boundaries to the DAG builder,  /// simplifying the DAG builder's support for "special" target instructions.  /// At the same time the design allows target schedulers to operate across -/// scheduling boundaries, for example to bundle the boudary instructions +/// scheduling boundaries, for example to bundle the boundary instructions  /// without reordering them. This creates complexity, because the target  /// scheduler must update the RegionBegin and RegionEnd positions cached by  /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler @@ -360,7 +361,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {    } else if (!mf.getSubtarget().enableMachineScheduler())      return false; -  DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); +  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));    // Initialize the context of the pass.    MF = &mf; @@ -372,7 +373,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {    LIS = &getAnalysis<LiveIntervals>();    if (VerifyScheduling) { -    DEBUG(LIS->dump()); +    LLVM_DEBUG(LIS->dump());      MF->verify(this, "Before machine scheduling.");    }    RegClassInfo->runOnMachineFunction(*MF); @@ -382,7 +383,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {    std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());    scheduleRegions(*Scheduler, false); -  DEBUG(LIS->dump()); +  LLVM_DEBUG(LIS->dump());    if (VerifyScheduling)      MF->verify(this, "After machine scheduling.");    return true; @@ -396,10 +397,10 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {      if (!EnablePostRAMachineSched)        return false;    } else if (!mf.getSubtarget().enablePostRAScheduler()) { -    DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); +    LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");      return false;    } -  DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); +  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));    // Initialize the context of the pass.    MF = &mf; @@ -481,7 +482,7 @@ getSchedRegions(MachineBasicBlock *MBB,        MachineInstr &MI = *std::prev(I);        if (isSchedBoundary(&MI, &*MBB, MF, TII))          break; -      if (!MI.isDebugValue()) +      if (!MI.isDebugInstr())          // MBB::size() uses instr_iterator to count. Here we need a bundle to          // count as a single instruction.          ++NumRegionInstrs; @@ -547,12 +548,13 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,          Scheduler.exitRegion();          continue;        } -      DEBUG(dbgs() << "********** MI Scheduling **********\n"); -      DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " " -                   << MBB->getName() << "\n  From: " << *I << "    To: "; -            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; -            else dbgs() << "End"; -            dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); +      LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); +      LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) +                        << " " << MBB->getName() << "\n  From: " << *I +                        << "    To: "; +                 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; +                 else dbgs() << "End"; +                 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');        if (DumpCriticalPathLength) {          errs() << MF->getName();          errs() << ":%bb. " << MBB->getNumber(); @@ -749,8 +751,8 @@ bool ScheduleDAGMI::checkSchedLimit() {  /// does not consider liveness or register pressure. It is useful for PostRA  /// scheduling and potentially other custom schedulers.  void ScheduleDAGMI::schedule() { -  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); -  DEBUG(SchedImpl->dumpPolicy()); +  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); +  LLVM_DEBUG(SchedImpl->dumpPolicy());    // Build the DAG.    buildSchedGraph(AA); @@ -762,26 +764,22 @@ void ScheduleDAGMI::schedule() {    SmallVector<SUnit*, 8> TopRoots, BotRoots;    findRootsAndBiasEdges(TopRoots, BotRoots); +  LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); +             for (const SUnit &SU +                  : SUnits) SU.dumpAll(this); +             if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this);); +  if (ViewMISchedDAGs) viewGraph(); +    // Initialize the strategy before modifying the DAG.    // This may initialize a DFSResult to be used for queue priority.    SchedImpl->initialize(this); -  DEBUG( -    if (EntrySU.getInstr() != nullptr) -      EntrySU.dumpAll(this); -    for (const SUnit &SU : SUnits) -      SU.dumpAll(this); -    if (ExitSU.getInstr() != nullptr) -      ExitSU.dumpAll(this); -  ); -  if (ViewMISchedDAGs) viewGraph(); -    // Initialize ready queues now that the DAG and priority data are finalized.    initQueues(TopRoots, BotRoots);    bool IsTopNode = false;    while (true) { -    DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); +    LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");      SUnit *SU = SchedImpl->pickNode(IsTopNode);      if (!SU) break; @@ -821,7 +819,7 @@ void ScheduleDAGMI::schedule() {    placeDebugValues(); -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "*** Final schedule for "             << printMBBReference(*begin()->getParent()) << " ***\n";      dumpSchedule(); @@ -1016,7 +1014,7 @@ void ScheduleDAGMILive::initRegPressure() {    // Close the RPTracker to finalize live ins.    RPTracker.closeRegion(); -  DEBUG(RPTracker.dump()); +  LLVM_DEBUG(RPTracker.dump());    // Initialize the live ins and live outs.    TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); @@ -1031,8 +1029,8 @@ void ScheduleDAGMILive::initRegPressure() {    BotRPTracker.initLiveThru(RPTracker);    if (!BotRPTracker.getLiveThru().empty()) {      TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); -    DEBUG(dbgs() << "Live Thru: "; -          dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); +    LLVM_DEBUG(dbgs() << "Live Thru: "; +               dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));    };    // For each live out vreg reduce the pressure change associated with other @@ -1046,15 +1044,13 @@ void ScheduleDAGMILive::initRegPressure() {      updatePressureDiffs(LiveUses);    } -  DEBUG( -    dbgs() << "Top Pressure:\n"; -    dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); -    dbgs() << "Bottom Pressure:\n"; -    dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); -  ); +  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; +             dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); +             dbgs() << "Bottom Pressure:\n"; +             dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););    assert((BotRPTracker.getPos() == RegionEnd || -          (RegionEnd->isDebugValue() && +          (RegionEnd->isDebugInstr() &&             BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&           "Can't find the region bottom"); @@ -1066,17 +1062,16 @@ void ScheduleDAGMILive::initRegPressure() {    for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {      unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);      if (RegionPressure[i] > Limit) { -      DEBUG(dbgs() << TRI->getRegPressureSetName(i) -            << " Limit " << Limit -            << " Actual " << RegionPressure[i] << "\n"); +      LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit +                        << " Actual " << RegionPressure[i] << "\n");        RegionCriticalPSets.push_back(PressureChange(i));      }    } -  DEBUG(dbgs() << "Excess PSets: "; -        for (const PressureChange &RCPS : RegionCriticalPSets) -          dbgs() << TRI->getRegPressureSetName( -            RCPS.getPSet()) << " "; -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Excess PSets: "; +             for (const PressureChange &RCPS +                  : RegionCriticalPSets) dbgs() +             << TRI->getRegPressureSetName(RCPS.getPSet()) << " "; +             dbgs() << "\n");  }  void ScheduleDAGMILive:: @@ -1097,10 +1092,11 @@ updateScheduledPressure(const SUnit *SU,      }      unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);      if (NewMaxPressure[ID] >= Limit - 2) { -      DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": " -            << NewMaxPressure[ID] -            << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit -            << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); +      LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": " +                        << NewMaxPressure[ID] +                        << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") +                        << Limit << "(+ " << BotRPTracker.getLiveThru()[ID] +                        << " livethru)\n");      }    }  } @@ -1130,17 +1126,14 @@ void ScheduleDAGMILive::updatePressureDiffs(          PressureDiff &PDiff = getPressureDiff(&SU);          PDiff.addPressureChange(Reg, Decrement, &MRI); -        DEBUG( -          dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") " -                 << printReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) -                 << ' ' << *SU.getInstr(); -          dbgs() << "              to "; -          PDiff.dump(*TRI); -        ); +        LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") " +                          << printReg(Reg, TRI) << ':' +                          << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr(); +                   dbgs() << "              to "; PDiff.dump(*TRI););        }      } else {        assert(P.LaneMask.any()); -      DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n"); +      LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");        // This may be called before CurrentBottom has been initialized. However,        // BotRPTracker must have a valid position. We want the value live into the        // instruction or live out of the block, so ask for the previous @@ -1168,12 +1161,9 @@ void ScheduleDAGMILive::updatePressureDiffs(            if (LRQ.valueIn() == VNI) {              PressureDiff &PDiff = getPressureDiff(SU);              PDiff.addPressureChange(Reg, true, &MRI); -            DEBUG( -              dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") " -                     << *SU->getInstr(); -              dbgs() << "              to "; -              PDiff.dump(*TRI); -            ); +            LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") " +                              << *SU->getInstr(); +                       dbgs() << "              to "; PDiff.dump(*TRI););            }          }        } @@ -1192,8 +1182,8 @@ void ScheduleDAGMILive::updatePressureDiffs(  /// ScheduleDAGMILive then it will want to override this virtual method in order  /// to update any specialized state.  void ScheduleDAGMILive::schedule() { -  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); -  DEBUG(SchedImpl->dumpPolicy()); +  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); +  LLVM_DEBUG(SchedImpl->dumpPolicy());    buildDAGWithRegPressure();    Topo.InitDAGTopologicalSorting(); @@ -1207,26 +1197,22 @@ void ScheduleDAGMILive::schedule() {    // This may initialize a DFSResult to be used for queue priority.    SchedImpl->initialize(this); -  DEBUG( -    if (EntrySU.getInstr() != nullptr) -      EntrySU.dumpAll(this); -    for (const SUnit &SU : SUnits) { -      SU.dumpAll(this); -      if (ShouldTrackPressure) { -        dbgs() << "  Pressure Diff      : "; -        getPressureDiff(&SU).dump(*TRI); -      } -      dbgs() << "  Single Issue       : "; -      if (SchedModel.mustBeginGroup(SU.getInstr()) && -         SchedModel.mustEndGroup(SU.getInstr())) -        dbgs() << "true;"; -      else -        dbgs() << "false;"; -      dbgs() << '\n'; -    } -    if (ExitSU.getInstr() != nullptr) -      ExitSU.dumpAll(this); -  ); +  LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this); +             for (const SUnit &SU +                  : SUnits) { +               SU.dumpAll(this); +               if (ShouldTrackPressure) { +                 dbgs() << "  Pressure Diff      : "; +                 getPressureDiff(&SU).dump(*TRI); +               } +               dbgs() << "  Single Issue       : "; +               if (SchedModel.mustBeginGroup(SU.getInstr()) && +                   SchedModel.mustEndGroup(SU.getInstr())) +                 dbgs() << "true;"; +               else +                 dbgs() << "false;"; +               dbgs() << '\n'; +             } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););    if (ViewMISchedDAGs) viewGraph();    // Initialize ready queues now that the DAG and priority data are finalized. @@ -1234,7 +1220,7 @@ void ScheduleDAGMILive::schedule() {    bool IsTopNode = false;    while (true) { -    DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); +    LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");      SUnit *SU = SchedImpl->pickNode(IsTopNode);      if (!SU) break; @@ -1262,7 +1248,7 @@ void ScheduleDAGMILive::schedule() {    placeDebugValues(); -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "*** Final schedule for "             << printMBBReference(*begin()->getParent()) << " ***\n";      dumpSchedule(); @@ -1379,13 +1365,13 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {        } else          CyclicLatency = 0; -      DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" -            << SU->NodeNum << ") = " << CyclicLatency << "c\n"); +      LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" +                        << SU->NodeNum << ") = " << CyclicLatency << "c\n");        if (CyclicLatency > MaxCyclicLatency)          MaxCyclicLatency = CyclicLatency;      }    } -  DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); +  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");    return MaxCyclicLatency;  } @@ -1429,10 +1415,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {        TopRPTracker.advance(RegOpers);        assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); -      DEBUG( -        dbgs() << "Top Pressure:\n"; -        dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); -      ); +      LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure( +                     TopRPTracker.getRegSetPressureAtPos(), TRI););        updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);      } @@ -1449,6 +1433,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {        }        moveInstruction(MI, CurrentBottom);        CurrentBottom = MI; +      BotRPTracker.setPos(CurrentBottom);      }      if (ShouldTrackPressure) {        RegisterOperands RegOpers; @@ -1467,10 +1452,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {        SmallVector<RegisterMaskPair, 8> LiveUses;        BotRPTracker.recede(RegOpers, &LiveUses);        assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); -      DEBUG( -        dbgs() << "Bottom Pressure:\n"; -        dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); -      ); +      LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure( +                     BotRPTracker.getRegSetPressureAtPos(), TRI););        updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);        updatePressureDiffs(LiveUses); @@ -1484,7 +1467,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {  namespace { -/// \brief Post-process the DAG to create cluster edges between neighboring +/// Post-process the DAG to create cluster edges between neighboring  /// loads or between neighboring stores.  class BaseMemOpClusterMutation : public ScheduleDAGMutation {    struct MemOpInfo { @@ -1561,7 +1544,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(    if (MemOpRecords.size() < 2)      return; -  std::sort(MemOpRecords.begin(), MemOpRecords.end()); +  llvm::sort(MemOpRecords.begin(), MemOpRecords.end());    unsigned ClusterLength = 1;    for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {      SUnit *SUa = MemOpRecords[Idx].SU; @@ -1570,8 +1553,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(                                   *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,                                   ClusterLength) &&          DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { -      DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" -            << SUb->NodeNum << ")\n"); +      LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" +                        << SUb->NodeNum << ")\n");        // Copy successor edges from SUa to SUb. Interleaving computation        // dependent on SUa can prevent load combining due to register reuse.        // Predecessor edges do not need to be copied from SUb to SUa since nearby @@ -1579,7 +1562,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(        for (const SDep &Succ : SUa->Succs) {          if (Succ.getSUnit() == SUb)            continue; -        DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); +        LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum +                          << ")\n");          DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));        }        ++ClusterLength; @@ -1588,7 +1572,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(    }  } -/// \brief Callback from DAG postProcessing to create cluster edges for loads. +/// Callback from DAG postProcessing to create cluster edges for loads.  void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {    ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); @@ -1629,7 +1613,7 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {  namespace { -/// \brief Post-process the DAG to create weak edges from all uses of a copy to +/// Post-process the DAG to create weak edges from all uses of a copy to  /// the one use that defines the copy's source vreg, most likely an induction  /// variable increment.  class CopyConstrain : public ScheduleDAGMutation { @@ -1724,7 +1708,7 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {    // If GlobalSegment is killed at the LocalLI->start, the call to find()    // returned the next global segment. But if GlobalSegment overlaps with -  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI +  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI    // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.    if (GlobalSegment->contains(LocalLI->beginIndex()))      ++GlobalSegment; @@ -1788,23 +1772,23 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {        return;      GlobalUses.push_back(Pred.getSUnit());    } -  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); +  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");    // Add the weak edges.    for (SmallVectorImpl<SUnit*>::const_iterator           I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { -    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU(" -          << GlobalSU->NodeNum << ")\n"); +    LLVM_DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU(" +                      << GlobalSU->NodeNum << ")\n");      DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));    }    for (SmallVectorImpl<SUnit*>::const_iterator           I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { -    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU(" -          << FirstLocalSU->NodeNum << ")\n"); +    LLVM_DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU(" +                      << FirstLocalSU->NodeNum << ")\n");      DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));    }  } -/// \brief Callback from DAG postProcessing to create weak edges to encourage +/// Callback from DAG postProcessing to create weak edges to encourage  /// copy elimination.  void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {    ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); @@ -1941,7 +1925,7 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) {  /// The scheduler supports two modes of hazard recognition. The first is the  /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that  /// supports highly complicated in-order reservation tables -/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.  ///  /// The second is a streamlined mechanism that checks for hazards based on  /// simple counters that the scheduler itself maintains. It explicitly checks @@ -1957,16 +1941,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {    unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());    if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { -    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops=" -          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); +    LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops=" +                      << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');      return true;    }    if (CurrMOps > 0 &&        ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||         (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { -    DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must " -                 << (isTop()? "begin" : "end") << " group\n"); +    LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must " +                      << (isTop() ? "begin" : "end") << " group\n");      return true;    } @@ -1982,9 +1966,9 @@ bool SchedBoundary::checkHazard(SUnit *SU) {  #ifndef NDEBUG          MaxObservedStall = std::max(Cycles, MaxObservedStall);  #endif -        DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") " -              << SchedModel->getResourceName(ResIdx) -              << "=" << NRCycle << "c\n"); +        LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") " +                          << SchedModel->getResourceName(ResIdx) << "=" +                          << NRCycle << "c\n");          return true;        }      } @@ -2005,8 +1989,8 @@ findMaxLatency(ArrayRef<SUnit*> ReadySUs) {      }    }    if (LateSU) { -    DEBUG(dbgs() << Available.getName() << " RemLatency SU(" -          << LateSU->NodeNum << ") " << RemLatency << "c\n"); +    LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU(" +                      << LateSU->NodeNum << ") " << RemLatency << "c\n");    }    return RemLatency;  } @@ -2022,8 +2006,8 @@ getOtherResourceCount(unsigned &OtherCritIdx) {    unsigned OtherCritCount = Rem->RemIssueCount      + (RetiredMOps * SchedModel->getMicroOpFactor()); -  DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: " -        << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); +  LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: " +                    << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');    for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();         PIdx != PEnd; ++PIdx) {      unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; @@ -2033,9 +2017,10 @@ getOtherResourceCount(unsigned &OtherCritIdx) {      }    }    if (OtherCritIdx) { -    DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: " -          << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) -          << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); +    LLVM_DEBUG( +        dbgs() << "  " << Available.getName() << " + Remain CritRes: " +               << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) +               << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");    }    return OtherCritCount;  } @@ -2099,7 +2084,8 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {        checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),                           getScheduledLatency()); -  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() +                    << '\n');  }  void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { @@ -2119,8 +2105,8 @@ unsigned SchedBoundary::  countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {    unsigned Factor = SchedModel->getResourceFactor(PIdx);    unsigned Count = Factor * Cycles; -  DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) -        << " +" << Cycles << "x" << Factor << "u\n"); +  LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +" +                    << Cycles << "x" << Factor << "u\n");    // Update Executed resources counts.    incExecutedResources(PIdx, Count); @@ -2131,16 +2117,17 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {    // becomes the critical resource.    if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {      ZoneCritResIdx = PIdx; -    DEBUG(dbgs() << "  *** Critical resource " -          << SchedModel->getResourceName(PIdx) << ": " -          << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); +    LLVM_DEBUG(dbgs() << "  *** Critical resource " +                      << SchedModel->getResourceName(PIdx) << ": " +                      << getResourceCount(PIdx) / SchedModel->getLatencyFactor() +                      << "c\n");    }    // For reserved resources, record the highest cycle using the resource.    unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);    if (NextAvailable > CurrCycle) { -    DEBUG(dbgs() << "  Resource conflict: " -          << SchedModel->getProcResource(PIdx)->Name << " reserved until @" -          << NextAvailable << "\n"); +    LLVM_DEBUG(dbgs() << "  Resource conflict: " +                      << SchedModel->getProcResource(PIdx)->Name +                      << " reserved until @" << NextAvailable << "\n");    }    return NextAvailable;  } @@ -2165,7 +2152,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {        "Cannot schedule this instruction's MicroOps in the current cycle.");    unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); -  DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n"); +  LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");    unsigned NextCycle = CurrCycle;    switch (SchedModel->getMicroOpBufferSize()) { @@ -2175,7 +2162,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {    case 1:      if (ReadyCycle > NextCycle) {        NextCycle = ReadyCycle; -      DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n"); +      LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");      }      break;    default: @@ -2204,8 +2191,9 @@ void SchedBoundary::bumpNode(SUnit *SU) {        if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))            >= (int)SchedModel->getLatencyFactor()) {          ZoneCritResIdx = 0; -        DEBUG(dbgs() << "  *** Critical resource NumMicroOps: " -              << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); +        LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: " +                          << ScaledMOps / SchedModel->getLatencyFactor() +                          << "c\n");        }      }      for (TargetSchedModel::ProcResIter @@ -2241,13 +2229,13 @@ void SchedBoundary::bumpNode(SUnit *SU) {    unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;    if (SU->getDepth() > TopLatency) {      TopLatency = SU->getDepth(); -    DEBUG(dbgs() << "  " << Available.getName() -          << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); +    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU(" +                      << SU->NodeNum << ") " << TopLatency << "c\n");    }    if (SU->getHeight() > BotLatency) {      BotLatency = SU->getHeight(); -    DEBUG(dbgs() << "  " << Available.getName() -          << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); +    LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU(" +                      << SU->NodeNum << ") " << BotLatency << "c\n");    }    // If we stall for any reason, bump the cycle.    if (NextCycle > CurrCycle) @@ -2271,17 +2259,17 @@ void SchedBoundary::bumpNode(SUnit *SU) {    // currCycle to X.    if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||        (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { -    DEBUG(dbgs() << "  Bump cycle to " -                 << (isTop() ? "end" : "begin") << " group\n"); +    LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin") +                      << " group\n");      bumpCycle(++NextCycle);    }    while (CurrMOps >= SchedModel->getIssueWidth()) { -    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps -          << " at cycle " << CurrCycle << '\n'); +    LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle " +                      << CurrCycle << '\n');      bumpCycle(++NextCycle);    } -  DEBUG(dumpScheduledState()); +  LLVM_DEBUG(dumpScheduledState());  }  /// Release pending ready nodes in to the available queue. This makes them @@ -2354,8 +2342,8 @@ SUnit *SchedBoundary::pickOnlyChoice() {      releasePending();    } -  DEBUG(Pending.dump()); -  DEBUG(Available.dump()); +  LLVM_DEBUG(Pending.dump()); +  LLVM_DEBUG(Available.dump());    if (Available.size() == 1)      return *Available.begin(); @@ -2453,27 +2441,24 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,    if (!OtherResLimited) {      if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {        Policy.ReduceLatency |= true; -      DEBUG(dbgs() << "  " << CurrZone.Available.getName() -            << " RemainingLatency " << RemLatency << " + " -            << CurrZone.getCurrCycle() << "c > CritPath " -            << Rem.CriticalPath << "\n"); +      LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName() +                        << " RemainingLatency " << RemLatency << " + " +                        << CurrZone.getCurrCycle() << "c > CritPath " +                        << Rem.CriticalPath << "\n");      }    }    // If the same resource is limiting inside and outside the zone, do nothing.    if (CurrZone.getZoneCritResIdx() == OtherCritIdx)      return; -  DEBUG( -    if (CurrZone.isResourceLimited()) { -      dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: " -             << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) -             << "\n"; -    } -    if (OtherResLimited) -      dbgs() << "  RemainingLimit: " -             << SchedModel->getResourceName(OtherCritIdx) << "\n"; -    if (!CurrZone.isResourceLimited() && !OtherResLimited) -      dbgs() << "  Latency limited both directions.\n"); +  LLVM_DEBUG(if (CurrZone.isResourceLimited()) { +    dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: " +           << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n"; +  } if (OtherResLimited) dbgs() +                 << "  RemainingLimit: " +                 << SchedModel->getResourceName(OtherCritIdx) << "\n"; +             if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs() +             << "  Latency limited both directions.\n");    if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)      Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); @@ -2560,11 +2545,12 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {  }  #endif +namespace llvm {  /// Return true if this heuristic determines order. -static bool tryLess(int TryVal, int CandVal, -                    GenericSchedulerBase::SchedCandidate &TryCand, -                    GenericSchedulerBase::SchedCandidate &Cand, -                    GenericSchedulerBase::CandReason Reason) { +bool tryLess(int TryVal, int CandVal, +             GenericSchedulerBase::SchedCandidate &TryCand, +             GenericSchedulerBase::SchedCandidate &Cand, +             GenericSchedulerBase::CandReason Reason) {    if (TryVal < CandVal) {      TryCand.Reason = Reason;      return true; @@ -2577,10 +2563,10 @@ static bool tryLess(int TryVal, int CandVal,    return false;  } -static bool tryGreater(int TryVal, int CandVal, -                       GenericSchedulerBase::SchedCandidate &TryCand, -                       GenericSchedulerBase::SchedCandidate &Cand, -                       GenericSchedulerBase::CandReason Reason) { +bool tryGreater(int TryVal, int CandVal, +                GenericSchedulerBase::SchedCandidate &TryCand, +                GenericSchedulerBase::SchedCandidate &Cand, +                GenericSchedulerBase::CandReason Reason) {    if (TryVal > CandVal) {      TryCand.Reason = Reason;      return true; @@ -2593,9 +2579,9 @@ static bool tryGreater(int TryVal, int CandVal,    return false;  } -static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, -                       GenericSchedulerBase::SchedCandidate &Cand, -                       SchedBoundary &Zone) { +bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, +                GenericSchedulerBase::SchedCandidate &Cand, +                SchedBoundary &Zone) {    if (Zone.isTop()) {      if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), @@ -2617,10 +2603,11 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,    }    return false;  } +} // end namespace llvm  static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { -  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") -        << GenericSchedulerBase::getReasonStr(Reason) << '\n'); +  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") +                    << GenericSchedulerBase::getReasonStr(Reason) << '\n');  }  static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { @@ -2742,14 +2729,14 @@ void GenericScheduler::checkAcyclicLatency() {    Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; -  DEBUG(dbgs() << "IssueCycles=" -        << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " -        << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() -        << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount -        << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() -        << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; -        if (Rem.IsAcyclicLatencyLimited) -          dbgs() << "  ACYCLIC LATENCY LIMIT\n"); +  LLVM_DEBUG( +      dbgs() << "IssueCycles=" +             << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " +             << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() +             << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount +             << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() +             << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; +      if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");  }  void GenericScheduler::registerRoots() { @@ -2760,7 +2747,7 @@ void GenericScheduler::registerRoots() {      if (SU->getDepth() > Rem.CriticalPath)        Rem.CriticalPath = SU->getDepth();    } -  DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); +  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');    if (DumpCriticalPathLength) {      errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";    } @@ -2771,13 +2758,14 @@ void GenericScheduler::registerRoots() {    }  } -static bool tryPressure(const PressureChange &TryP, -                        const PressureChange &CandP, -                        GenericSchedulerBase::SchedCandidate &TryCand, -                        GenericSchedulerBase::SchedCandidate &Cand, -                        GenericSchedulerBase::CandReason Reason, -                        const TargetRegisterInfo *TRI, -                        const MachineFunction &MF) { +namespace llvm { +bool tryPressure(const PressureChange &TryP, +                 const PressureChange &CandP, +                 GenericSchedulerBase::SchedCandidate &TryCand, +                 GenericSchedulerBase::SchedCandidate &Cand, +                 GenericSchedulerBase::CandReason Reason, +                 const TargetRegisterInfo *TRI, +                 const MachineFunction &MF) {    // If one candidate decreases and the other increases, go with it.    // Invalid candidates have UnitInc==0.    if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, @@ -2810,7 +2798,7 @@ static bool tryPressure(const PressureChange &TryP,    return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);  } -static unsigned getWeakLeft(const SUnit *SU, bool isTop) { +unsigned getWeakLeft(const SUnit *SU, bool isTop) {    return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;  } @@ -2821,7 +2809,7 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {  /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled  /// with the operation that produces or consumes the physreg. We'll do this when  /// regalloc has support for parallel copies. -static int biasPhysRegCopy(const SUnit *SU, bool isTop) { +int biasPhysRegCopy(const SUnit *SU, bool isTop) {    const MachineInstr *MI = SU->getInstr();    if (!MI->isCopy())      return 0; @@ -2841,6 +2829,7 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) {      return AtBoundary ? -1 : 1;    return 0;  } +} // end namespace llvm  void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,                                       bool AtTop, @@ -2873,13 +2862,13 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,        }      }    } -  DEBUG(if (Cand.RPDelta.Excess.isValid()) -          dbgs() << "  Try  SU(" << Cand.SU->NodeNum << ") " -                 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) -                 << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n"); +  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs() +             << "  Try  SU(" << Cand.SU->NodeNum << ") " +             << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":" +             << Cand.RPDelta.Excess.getUnitInc() << "\n");  } -/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// Apply a set of heuristics to a new candidate. Heuristics are currently  /// hierarchical. This may be more efficient than a graduated cost model because  /// we don't need to evaluate all aspects of the model for each node in the  /// queue. But it's really done to make the heuristics easier to debug and @@ -2891,7 +2880,7 @@ void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,  //              if Cand is from a different zone than TryCand.  void GenericScheduler::tryCandidate(SchedCandidate &Cand,                                      SchedCandidate &TryCand, -                                    SchedBoundary *Zone) { +                                    SchedBoundary *Zone) const {    // Initialize the candidate if needed.    if (!Cand.isValid()) {      TryCand.Reason = NodeOrder; @@ -3017,7 +3006,7 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,        if (TryCand.ResDelta == SchedResourceDelta())          TryCand.initResourceDelta(DAG, SchedModel);        Cand.setBest(TryCand); -      DEBUG(traceCandidate(Cand)); +      LLVM_DEBUG(traceCandidate(Cand));      }    }  } @@ -3046,14 +3035,14 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {    setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);    // See if BotCand is still valid (because we previously scheduled from Top). -  DEBUG(dbgs() << "Picking from Bot:\n"); +  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");    if (!BotCand.isValid() || BotCand.SU->isScheduled ||        BotCand.Policy != BotPolicy) {      BotCand.reset(CandPolicy());      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);      assert(BotCand.Reason != NoCand && "failed to find the first candidate");    } else { -    DEBUG(traceCandidate(BotCand)); +    LLVM_DEBUG(traceCandidate(BotCand));  #ifndef NDEBUG      if (VerifyScheduling) {        SchedCandidate TCand; @@ -3066,14 +3055,14 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {    }    // Check if the top Q has a better candidate. -  DEBUG(dbgs() << "Picking from Top:\n"); +  LLVM_DEBUG(dbgs() << "Picking from Top:\n");    if (!TopCand.isValid() || TopCand.SU->isScheduled ||        TopCand.Policy != TopPolicy) {      TopCand.reset(CandPolicy());      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);      assert(TopCand.Reason != NoCand && "failed to find the first candidate");    } else { -    DEBUG(traceCandidate(TopCand)); +    LLVM_DEBUG(traceCandidate(TopCand));  #ifndef NDEBUG      if (VerifyScheduling) {        SchedCandidate TCand; @@ -3093,7 +3082,7 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {    tryCandidate(Cand, TopCand, nullptr);    if (TopCand.Reason != NoCand) {      Cand.setBest(TopCand); -    DEBUG(traceCandidate(Cand)); +    LLVM_DEBUG(traceCandidate(Cand));    }    IsTopNode = Cand.AtTop; @@ -3142,7 +3131,8 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {    if (SU->isBottomReady())      Bot.removeReady(SU); -  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); +  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " +                    << *SU->getInstr());    return SU;  } @@ -3163,8 +3153,8 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {      MachineInstr *Copy = DepSU->getInstr();      if (!Copy->isCopy())        continue; -    DEBUG(dbgs() << "  Rescheduling physreg copy "; -          Dep.getSUnit()->dump(DAG)); +    LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy "; +               Dep.getSUnit()->dump(DAG));      DAG->moveInstruction(Copy, InsertPos);    }  } @@ -3243,13 +3233,13 @@ void PostGenericScheduler::registerRoots() {      if (SU->getDepth() > Rem.CriticalPath)        Rem.CriticalPath = SU->getDepth();    } -  DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); +  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');    if (DumpCriticalPathLength) {      errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";    }  } -/// Apply a set of heursitics to a new candidate for PostRA scheduling. +/// Apply a set of heuristics to a new candidate for PostRA scheduling.  ///  /// \param Cand provides the policy and current best candidate.  /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. @@ -3301,7 +3291,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {      tryCandidate(Cand, TryCand);      if (TryCand.Reason != NoCand) {        Cand.setBest(TryCand); -      DEBUG(traceCandidate(Cand)); +      LLVM_DEBUG(traceCandidate(Cand));      }    }  } @@ -3333,7 +3323,8 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {    IsTopNode = true;    Top.removeReady(SU); -  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); +  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " +                    << *SU->getInstr());    return SU;  } @@ -3355,7 +3346,7 @@ ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {  namespace { -/// \brief Order nodes by the ILP metric. +/// Order nodes by the ILP metric.  struct ILPOrder {    const SchedDFSResult *DFSResult = nullptr;    const BitVector *ScheduledTrees = nullptr; @@ -3363,7 +3354,7 @@ struct ILPOrder {    ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} -  /// \brief Apply a less-than relation on node priority. +  /// Apply a less-than relation on node priority.    ///    /// (Return true if A comes after B in the Q.)    bool operator()(const SUnit *A, const SUnit *B) const { @@ -3388,7 +3379,7 @@ struct ILPOrder {    }  }; -/// \brief Schedule based on the ILP metric. +/// Schedule based on the ILP metric.  class ILPScheduler : public MachineSchedStrategy {    ScheduleDAGMILive *DAG = nullptr;    ILPOrder Cmp; @@ -3422,16 +3413,19 @@ public:      SUnit *SU = ReadyQ.back();      ReadyQ.pop_back();      IsTopNode = false; -    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " -          << " ILP: " << DAG->getDFSResult()->getILP(SU) -          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" -          << DAG->getDFSResult()->getSubtreeLevel( -            DAG->getDFSResult()->getSubtreeID(SU)) << '\n' -          << "Scheduling " << *SU->getInstr()); +    LLVM_DEBUG(dbgs() << "Pick node " +                      << "SU(" << SU->NodeNum << ") " +                      << " ILP: " << DAG->getDFSResult()->getILP(SU) +                      << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) +                      << " @" +                      << DAG->getDFSResult()->getSubtreeLevel( +                             DAG->getDFSResult()->getSubtreeID(SU)) +                      << '\n' +                      << "Scheduling " << *SU->getInstr());      return SU;    } -  /// \brief Scheduler callback to notify that a new subtree is scheduled. +  /// Scheduler callback to notify that a new subtree is scheduled.    void scheduleTree(unsigned SubtreeID) override {      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);    } diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index bedfdd84b1ca..354f46e9e625 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -77,6 +77,7 @@ static cl::opt<unsigned> SplitEdgeProbabilityThreshold(  STATISTIC(NumSunk,      "Number of machine instructions sunk");  STATISTIC(NumSplit,     "Number of critical edges split");  STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");  namespace { @@ -138,7 +139,7 @@ namespace {                                       MachineBasicBlock *From,                                       MachineBasicBlock *To); -    /// \brief Postpone the splitting of the given critical +    /// Postpone the splitting of the given critical      /// edge (\p From, \p To).      ///      /// We do not split the edges on the fly. Indeed, this invalidates @@ -210,8 +211,8 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,    MachineInstr *DefMI = MRI->getVRegDef(SrcReg);    if (DefMI->isCopyLike())      return false; -  DEBUG(dbgs() << "Coalescing: " << *DefMI); -  DEBUG(dbgs() << "*** to: " << MI); +  LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); +  LLVM_DEBUG(dbgs() << "*** to: " << MI);    MRI->replaceRegWith(DstReg, SrcReg);    MI.eraseFromParent(); @@ -295,7 +296,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {    if (skipFunction(MF.getFunction()))      return false; -  DEBUG(dbgs() << "******** Machine Sinking ********\n"); +  LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");    TII = MF.getSubtarget().getInstrInfo();    TRI = MF.getSubtarget().getRegisterInfo(); @@ -322,14 +323,14 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {      for (auto &Pair : ToSplit) {        auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);        if (NewSucc != nullptr) { -        DEBUG(dbgs() << " *** Splitting critical edge: " -                     << printMBBReference(*Pair.first) << " -- " -                     << printMBBReference(*NewSucc) << " -- " -                     << printMBBReference(*Pair.second) << '\n'); +        LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " +                          << printMBBReference(*Pair.first) << " -- " +                          << printMBBReference(*NewSucc) << " -- " +                          << printMBBReference(*Pair.second) << '\n');          MadeChange = true;          ++NumSplit;        } else -        DEBUG(dbgs() << " *** Not legal to break critical edge\n"); +        LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");      }      // If this iteration over the code changed anything, keep iterating.      if (!MadeChange) break; @@ -371,7 +372,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {      if (!ProcessedBegin)        --I; -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue;      bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); @@ -708,7 +709,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,    return SuccToSinkTo;  } -/// \brief Return true if MI is likely to be usable as a memory operation by the +/// Return true if MI is likely to be usable as a memory operation by the  /// implicit null check optimization.  ///  /// This is a "best effort" heuristic, and should not be relied upon for @@ -752,6 +753,37 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,           MBP.LHS.getReg() == BaseReg;  } +/// Sink an instruction and its associated debug instructions. +static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, +                        MachineBasicBlock::iterator InsertPos) { +  // Collect matching debug values. +  SmallVector<MachineInstr *, 2> DbgValuesToSink; +  collectDebugValues(MI, DbgValuesToSink); + +  // If we cannot find a location to use (merge with), then we erase the debug +  // location to prevent debug-info driven tools from potentially reporting +  // wrong location information. +  if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) +    MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), +                                                 InsertPos->getDebugLoc())); +  else +    MI.setDebugLoc(DebugLoc()); + +  // Move the instruction. +  MachineBasicBlock *ParentBlock = MI.getParent(); +  SuccToSinkTo.splice(InsertPos, ParentBlock, MI, +                      ++MachineBasicBlock::iterator(MI)); + +  // Move previously adjacent debug value instructions to the insert position. +  for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), +                                                 DBE = DbgValuesToSink.end(); +       DBI != DBE; ++DBI) { +    MachineInstr *DbgMI = *DBI; +    SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI, +                        ++MachineBasicBlock::iterator(DbgMI)); +  } +} +  /// SinkInstruction - Determine whether it is safe to sink the specified machine  /// instruction out of its current block into a successor.  bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, @@ -803,7 +835,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,        return false;    } -  DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); +  LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);    // If the block has multiple predecessors, this is a critical edge.    // Decide if we can sink along it or need to break the edge. @@ -813,26 +845,26 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,      bool TryBreak = false;      bool store = true;      if (!MI.isSafeToMove(AA, store)) { -      DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); +      LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");        TryBreak = true;      }      // We don't want to sink across a critical edge if we don't dominate the      // successor. We could be introducing calculations to new code paths.      if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { -      DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); +      LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");        TryBreak = true;      }      // Don't sink instructions into a loop.      if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { -      DEBUG(dbgs() << " *** NOTE: Loop header found\n"); +      LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");        TryBreak = true;      }      // Otherwise we are OK with sinking along a critical edge.      if (!TryBreak) -      DEBUG(dbgs() << "Sinking along critical edge.\n"); +      LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");      else {        // Mark this edge as to be split.        // If the edge can actually be split, the next iteration of the main loop @@ -840,8 +872,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,        bool Status =          PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);        if (!Status) -        DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " -              "break critical edge\n"); +        LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " +                             "break critical edge\n");        // The instruction will not be sunk this time.        return false;      } @@ -854,8 +886,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,      bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,                                              SuccToSinkTo, BreakPHIEdge);      if (!Status) -      DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " -            "break critical edge\n"); +      LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " +                           "break critical edge\n");      // The instruction will not be sunk this time.      return false;    } @@ -865,30 +897,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,    while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())      ++InsertPos; -  // collect matching debug values. -  SmallVector<MachineInstr *, 2> DbgValuesToSink; -  collectDebugValues(MI, DbgValuesToSink); - -  // Merge or erase debug location to ensure consistent stepping in profilers -  // and debuggers. -  if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end()) -    MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), -                                                 InsertPos->getDebugLoc())); -  else -    MI.setDebugLoc(DebugLoc()); - - -  // Move the instruction. -  SuccToSinkTo->splice(InsertPos, ParentBlock, MI, -                       ++MachineBasicBlock::iterator(MI)); - -  // Move previously adjacent debug value instructions to the insert position. -  for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), -         DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { -    MachineInstr *DbgMI = *DBI; -    SuccToSinkTo->splice(InsertPos, ParentBlock,  DbgMI, -                         ++MachineBasicBlock::iterator(DbgMI)); -  } +  performSink(MI, *SuccToSinkTo, InsertPos);    // Conservatively, clear any kill flags, since it's possible that they are no    // longer correct. @@ -902,3 +911,282 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,    return true;  } + +//===----------------------------------------------------------------------===// +// This pass is not intended to be a replacement or a complete alternative +// for the pre-ra machine sink pass. It is only designed to sink COPY +// instructions which should be handled after RA. +// +// This pass sinks COPY instructions into a successor block, if the COPY is not +// used in the current block and the COPY is live-in to a single successor +// (i.e., doesn't require the COPY to be duplicated).  This avoids executing the +// copy on paths where their results aren't needed.  This also exposes +// additional opportunites for dead copy elimination and shrink wrapping. +// +// These copies were either not handled by or are inserted after the MachineSink +// pass. As an example of the former case, the MachineSink pass cannot sink +// COPY instructions with allocatable source registers; for AArch64 these type +// of copy instructions are frequently used to move function parameters (PhyReg) +// into virtual registers in the entry block. +// +// For the machine IR below, this pass will sink %w19 in the entry into its +// successor (%bb.1) because %w19 is only live-in in %bb.1. +// %bb.0: +//   %wzr = SUBSWri %w1, 1 +//   %w19 = COPY %w0 +//   Bcc 11, %bb.2 +// %bb.1: +//   Live Ins: %w19 +//   BL @fun +//   %w0 = ADDWrr %w0, %w19 +//   RET %w0 +// %bb.2: +//   %w0 = COPY %wzr +//   RET %w0 +// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be +// able to see %bb.0 as a candidate. +//===----------------------------------------------------------------------===// +namespace { + +class PostRAMachineSinking : public MachineFunctionPass { +public: +  bool runOnMachineFunction(MachineFunction &MF) override; + +  static char ID; +  PostRAMachineSinking() : MachineFunctionPass(ID) {} +  StringRef getPassName() const override { return "PostRA Machine Sink"; } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesCFG(); +    MachineFunctionPass::getAnalysisUsage(AU); +  } + +  MachineFunctionProperties getRequiredProperties() const override { +    return MachineFunctionProperties().set( +        MachineFunctionProperties::Property::NoVRegs); +  } + +private: +  /// Track which register units have been modified and used. +  LiveRegUnits ModifiedRegUnits, UsedRegUnits; + +  /// Sink Copy instructions unused in the same block close to their uses in +  /// successors. +  bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, +                     const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); +}; +} // namespace + +char PostRAMachineSinking::ID = 0; +char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; + +INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink", +                "PostRA Machine Sink", false, false) + +static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg, +                                  const TargetRegisterInfo *TRI) { +  LiveRegUnits LiveInRegUnits(*TRI); +  LiveInRegUnits.addLiveIns(MBB); +  return !LiveInRegUnits.available(Reg); +} + +static MachineBasicBlock * +getSingleLiveInSuccBB(MachineBasicBlock &CurBB, +                      const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, +                      unsigned Reg, const TargetRegisterInfo *TRI) { +  // Try to find a single sinkable successor in which Reg is live-in. +  MachineBasicBlock *BB = nullptr; +  for (auto *SI : SinkableBBs) { +    if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) { +      // If BB is set here, Reg is live-in to at least two sinkable successors, +      // so quit. +      if (BB) +        return nullptr; +      BB = SI; +    } +  } +  // Reg is not live-in to any sinkable successors. +  if (!BB) +    return nullptr; + +  // Check if any register aliased with Reg is live-in in other successors. +  for (auto *SI : CurBB.successors()) { +    if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI)) +      return nullptr; +  } +  return BB; +} + +static MachineBasicBlock * +getSingleLiveInSuccBB(MachineBasicBlock &CurBB, +                      const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, +                      ArrayRef<unsigned> DefedRegsInCopy, +                      const TargetRegisterInfo *TRI) { +  MachineBasicBlock *SingleBB = nullptr; +  for (auto DefReg : DefedRegsInCopy) { +    MachineBasicBlock *BB = +        getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI); +    if (!BB || (SingleBB && SingleBB != BB)) +      return nullptr; +    SingleBB = BB; +  } +  return SingleBB; +} + +static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, +                           SmallVectorImpl<unsigned> &UsedOpsInCopy, +                           LiveRegUnits &UsedRegUnits, +                           const TargetRegisterInfo *TRI) { +  for (auto U : UsedOpsInCopy) { +    MachineOperand &MO = MI->getOperand(U); +    unsigned SrcReg = MO.getReg(); +    if (!UsedRegUnits.available(SrcReg)) { +      MachineBasicBlock::iterator NI = std::next(MI->getIterator()); +      for (MachineInstr &UI : make_range(NI, CurBB.end())) { +        if (UI.killsRegister(SrcReg, TRI)) { +          UI.clearRegisterKills(SrcReg, TRI); +          MO.setIsKill(true); +          break; +        } +      } +    } +  } +} + +static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, +                         SmallVectorImpl<unsigned> &UsedOpsInCopy, +                         SmallVectorImpl<unsigned> &DefedRegsInCopy) { +  for (auto DefReg : DefedRegsInCopy) +    SuccBB->removeLiveIn(DefReg); +  for (auto U : UsedOpsInCopy) { +    unsigned Reg = MI->getOperand(U).getReg(); +    if (!SuccBB->isLiveIn(Reg)) +      SuccBB->addLiveIn(Reg); +  } +} + +static bool hasRegisterDependency(MachineInstr *MI, +                                  SmallVectorImpl<unsigned> &UsedOpsInCopy, +                                  SmallVectorImpl<unsigned> &DefedRegsInCopy, +                                  LiveRegUnits &ModifiedRegUnits, +                                  LiveRegUnits &UsedRegUnits) { +  bool HasRegDependency = false; +  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { +    MachineOperand &MO = MI->getOperand(i); +    if (!MO.isReg()) +      continue; +    unsigned Reg = MO.getReg(); +    if (!Reg) +      continue; +    if (MO.isDef()) { +      if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) { +        HasRegDependency = true; +        break; +      } +      DefedRegsInCopy.push_back(Reg); + +      // FIXME: instead of isUse(), readsReg() would be a better fix here, +      // For example, we can ignore modifications in reg with undef. However, +      // it's not perfectly clear if skipping the internal read is safe in all +      // other targets. +    } else if (MO.isUse()) { +      if (!ModifiedRegUnits.available(Reg)) { +        HasRegDependency = true; +        break; +      } +      UsedOpsInCopy.push_back(i); +    } +  } +  return HasRegDependency; +} + +bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, +                                         MachineFunction &MF, +                                         const TargetRegisterInfo *TRI, +                                         const TargetInstrInfo *TII) { +  SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs; +  // FIXME: For now, we sink only to a successor which has a single predecessor +  // so that we can directly sink COPY instructions to the successor without +  // adding any new block or branch instruction. +  for (MachineBasicBlock *SI : CurBB.successors()) +    if (!SI->livein_empty() && SI->pred_size() == 1) +      SinkableBBs.insert(SI); + +  if (SinkableBBs.empty()) +    return false; + +  bool Changed = false; + +  // Track which registers have been modified and used between the end of the +  // block and the current instruction. +  ModifiedRegUnits.clear(); +  UsedRegUnits.clear(); + +  for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) { +    MachineInstr *MI = &*I; +    ++I; + +    if (MI->isDebugInstr()) +      continue; + +    // Do not move any instruction across function call. +    if (MI->isCall()) +      return false; + +    if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) { +      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, +                                        TRI); +      continue; +    } + +    // Track the operand index for use in Copy. +    SmallVector<unsigned, 2> UsedOpsInCopy; +    // Track the register number defed in Copy. +    SmallVector<unsigned, 2> DefedRegsInCopy; + +    // Don't sink the COPY if it would violate a register dependency. +    if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy, +                              ModifiedRegUnits, UsedRegUnits)) { +      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, +                                        TRI); +      continue; +    } +    assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && +           "Unexpect SrcReg or DefReg"); +    MachineBasicBlock *SuccBB = +        getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); +    // Don't sink if we cannot find a single sinkable successor in which Reg +    // is live-in. +    if (!SuccBB) { +      LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits, +                                        TRI); +      continue; +    } +    assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && +           "Unexpected predecessor"); + +    // Clear the kill flag if SrcReg is killed between MI and the end of the +    // block. +    clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); +    MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); +    performSink(*MI, *SuccBB, InsertPos); +    updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); + +    Changed = true; +    ++NumPostRACopySink; +  } +  return Changed; +} + +bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { +  bool Changed = false; +  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); +  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + +  ModifiedRegUnits.init(*TRI); +  UsedRegUnits.init(*TRI); +  for (auto &BB : MF) +    Changed |= tryToSinkCopy(BB, MF, TRI, TII); + +  return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index d81c6f8a31e1..b444cd31eba2 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -70,7 +70,7 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {    TRI = ST.getRegisterInfo();    MRI = &MF->getRegInfo();    Loops = &getAnalysis<MachineLoopInfo>(); -  SchedModel.init(ST.getSchedModel(), &ST, TII); +  SchedModel.init(&ST);    BlockInfo.resize(MF->getNumBlockIDs());    ProcResourceCycles.resize(MF->getNumBlockIDs() *                              SchedModel.getNumProcResourceKinds()); @@ -396,8 +396,8 @@ MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {  }  void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { -  DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) -               << '\n'); +  LLVM_DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) +                    << '\n');    BlockInfo[MBB->getNumber()].invalidate();    for (unsigned i = 0; i != TS_NumStrategies; ++i)      if (Ensembles[i]) @@ -477,8 +477,8 @@ public:  /// Compute the trace through MBB.  void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { -  DEBUG(dbgs() << "Computing " << getName() << " trace through " -               << printMBBReference(*MBB) << '\n'); +  LLVM_DEBUG(dbgs() << "Computing " << getName() << " trace through " +                    << printMBBReference(*MBB) << '\n');    // Set up loop bounds for the backwards post-order traversal.    LoopBounds Bounds(BlockInfo, MTM.Loops); @@ -486,11 +486,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {    Bounds.Downward = false;    Bounds.Visited.clear();    for (auto I : inverse_post_order_ext(MBB, Bounds)) { -    DEBUG(dbgs() << "  pred for " << printMBBReference(*I) << ": "); +    LLVM_DEBUG(dbgs() << "  pred for " << printMBBReference(*I) << ": ");      TraceBlockInfo &TBI = BlockInfo[I->getNumber()];      // All the predecessors have been visited, pick the preferred one.      TBI.Pred = pickTracePred(I); -    DEBUG({ +    LLVM_DEBUG({        if (TBI.Pred)          dbgs() << printMBBReference(*TBI.Pred) << '\n';        else @@ -504,11 +504,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {    Bounds.Downward = true;    Bounds.Visited.clear();    for (auto I : post_order_ext(MBB, Bounds)) { -    DEBUG(dbgs() << "  succ for " << printMBBReference(*I) << ": "); +    LLVM_DEBUG(dbgs() << "  succ for " << printMBBReference(*I) << ": ");      TraceBlockInfo &TBI = BlockInfo[I->getNumber()];      // All the successors have been visited, pick the preferred one.      TBI.Succ = pickTraceSucc(I); -    DEBUG({ +    LLVM_DEBUG({        if (TBI.Succ)          dbgs() << printMBBReference(*TBI.Succ) << '\n';        else @@ -531,8 +531,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {      WorkList.push_back(BadMBB);      do {        const MachineBasicBlock *MBB = WorkList.pop_back_val(); -      DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' -                   << getName() << " height.\n"); +      LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' +                        << getName() << " height.\n");        // Find any MBB predecessors that have MBB as their preferred successor.        // They are the only ones that need to be invalidated.        for (const MachineBasicBlock *Pred : MBB->predecessors()) { @@ -556,8 +556,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {      WorkList.push_back(BadMBB);      do {        const MachineBasicBlock *MBB = WorkList.pop_back_val(); -      DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' -                   << getName() << " depth.\n"); +      LLVM_DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' +                        << getName() << " depth.\n");        // Find any MBB successors that have MBB as their preferred predecessor.        // They are the only ones that need to be invalidated.        for (const MachineBasicBlock *Succ : MBB->successors()) { @@ -653,7 +653,7 @@ static bool getDataDeps(const MachineInstr &UseMI,                          SmallVectorImpl<DataDep> &Deps,                          const MachineRegisterInfo *MRI) {    // Debug values should not be included in any calculations. -  if (UseMI.isDebugValue()) +  if (UseMI.isDebugInstr())      return false;    bool HasPhysRegs = false; @@ -813,9 +813,9 @@ updateDepth(MachineTraceMetrics::TraceBlockInfo &TBI, const MachineInstr &UseMI,    if (TBI.HasValidInstrHeights) {      // Update critical path length.      TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); -    DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); +    LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);    } else { -    DEBUG(dbgs() << Cycle << '\t' << UseMI); +    LLVM_DEBUG(dbgs() << Cycle << '\t' << UseMI);    }  } @@ -860,13 +860,13 @@ computeInstrDepths(const MachineBasicBlock *MBB) {    // Go through trace blocks in top-down order, stopping after the center block.    while (!Stack.empty()) {      MBB = Stack.pop_back_val(); -    DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n"); +    LLVM_DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n");      TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];      TBI.HasValidInstrDepths = true;      TBI.CriticalPath = 0;      // Print out resource depths here as well. -    DEBUG({ +    LLVM_DEBUG({        dbgs() << format("%7u Instructions\n", TBI.InstrDepth);        ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());        for (unsigned K = 0; K != PRDepths.size(); ++K) @@ -1045,12 +1045,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) {    SmallVector<DataDep, 8> Deps;    for (;!Stack.empty(); Stack.pop_back()) {      MBB = Stack.back(); -    DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n"); +    LLVM_DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n");      TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];      TBI.HasValidInstrHeights = true;      TBI.CriticalPath = 0; -    DEBUG({ +    LLVM_DEBUG({        dbgs() << format("%7u Instructions\n", TBI.InstrHeight);        ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());        for (unsigned K = 0; K != PRHeights.size(); ++K) @@ -1081,7 +1081,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {          if (!Deps.empty()) {            // Loop header PHI heights are all 0.            unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; -          DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); +          LLVM_DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);            if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,                              MTM.TII))              addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); @@ -1122,38 +1122,38 @@ computeInstrHeights(const MachineBasicBlock *MBB) {        InstrCycles &MICycles = Cycles[&MI];        MICycles.Height = Cycle;        if (!TBI.HasValidInstrDepths) { -        DEBUG(dbgs() << Cycle << '\t' << MI); +        LLVM_DEBUG(dbgs() << Cycle << '\t' << MI);          continue;        }        // Update critical path length.        TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); -      DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI); +      LLVM_DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);      }      // Update virtual live-in heights. They were added by addLiveIns() with a 0      // height because the final height isn't known until now. -    DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:"); +    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:");      for (LiveInReg &LIR : TBI.LiveIns) {        const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);        LIR.Height = Heights.lookup(DefMI); -      DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height); +      LLVM_DEBUG(dbgs() << ' ' << printReg(LIR.Reg) << '@' << LIR.Height);      }      // Transfer the live regunits to the live-in list.      for (SparseSet<LiveRegUnit>::const_iterator           RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {        TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); -      DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) -                   << '@' << RI->Cycle); +      LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@' +                        << RI->Cycle);      } -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');      if (!TBI.HasValidInstrDepths)        continue;      // Add live-ins to the critical path length.      TBI.CriticalPath = std::max(TBI.CriticalPath,                                  computeCrossBlockCriticalPath(TBI)); -    DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); +    LLVM_DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');    }  } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index e0cc2ca9a2a2..d644e41abc5b 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -239,7 +239,8 @@ namespace {      void report(const char *msg, const MachineFunction *MF);      void report(const char *msg, const MachineBasicBlock *MBB);      void report(const char *msg, const MachineInstr *MI); -    void report(const char *msg, const MachineOperand *MO, unsigned MONum); +    void report(const char *msg, const MachineOperand *MO, unsigned MONum, +                LLT MOVRegType = LLT{});      void report_context(const LiveInterval &LI) const;      void report_context(const LiveRange &LR, unsigned VRegUnit, @@ -250,16 +251,16 @@ namespace {      void report_context_liverange(const LiveRange &LR) const;      void report_context_lanemask(LaneBitmask LaneMask) const;      void report_context_vreg(unsigned VReg) const; -    void report_context_vreg_regunit(unsigned VRegOrRegUnit) const; +    void report_context_vreg_regunit(unsigned VRegOrUnit) const;      void verifyInlineAsm(const MachineInstr *MI);      void checkLiveness(const MachineOperand *MO, unsigned MONum);      void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum, -                            SlotIndex UseIdx, const LiveRange &LR, unsigned Reg, +                            SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,                              LaneBitmask LaneMask = LaneBitmask::getNone());      void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, -                            SlotIndex DefIdx, const LiveRange &LR, unsigned Reg, +                            SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,                              LaneBitmask LaneMask = LaneBitmask::getNone());      void markReachable(const MachineBasicBlock *MBB); @@ -359,11 +360,15 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {    TRI = MF.getSubtarget().getRegisterInfo();    MRI = &MF.getRegInfo(); -  isFunctionRegBankSelected = MF.getProperties().hasProperty( -      MachineFunctionProperties::Property::RegBankSelected); -  isFunctionSelected = MF.getProperties().hasProperty( -      MachineFunctionProperties::Property::Selected); - +  const bool isFunctionFailedISel = MF.getProperties().hasProperty( +      MachineFunctionProperties::Property::FailedISel); +  isFunctionRegBankSelected = +      !isFunctionFailedISel && +      MF.getProperties().hasProperty( +          MachineFunctionProperties::Property::RegBankSelected); +  isFunctionSelected = !isFunctionFailedISel && +                       MF.getProperties().hasProperty( +                           MachineFunctionProperties::Property::Selected);    LiveVars = nullptr;    LiveInts = nullptr;    LiveStks = nullptr; @@ -486,15 +491,14 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {    if (Indexes && Indexes->hasIndex(*MI))      errs() << Indexes->getInstructionIndex(*MI) << '\t';    MI->print(errs(), /*SkipOpers=*/true); -  errs() << '\n';  } -void MachineVerifier::report(const char *msg, -                             const MachineOperand *MO, unsigned MONum) { +void MachineVerifier::report(const char *msg, const MachineOperand *MO, +                             unsigned MONum, LLT MOVRegType) {    assert(MO);    report(msg, MO->getParent());    errs() << "- operand " << MONum << ":   "; -  MO->print(errs(), TRI); +  MO->print(errs(), MOVRegType, TRI);    errs() << "\n";  } @@ -642,7 +646,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {        !(AsmInfo &&          AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&          BB && isa<SwitchInst>(BB->getTerminator())) && -      !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) +      !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))      report("MBB has more than one landing pad successor", MBB);    // Call AnalyzeBranch. If it succeeds, there several more conditions to check. @@ -873,11 +877,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {    if (MI->getNumOperands() < MCID.getNumOperands()) {      report("Too few operands", MI);      errs() << MCID.getNumOperands() << " operands expected, but " -        << MI->getNumOperands() << " given.\n"; +           << MI->getNumOperands() << " given.\n";    }    if (MI->isPHI() && MF->getProperties().hasProperty( -          MachineFunctionProperties::Property::NoPHIs)) +                         MachineFunctionProperties::Property::NoPHIs))      report("Found PHI instruction with NoPHIs property set", MI);    // Check the tied operands. @@ -886,7 +890,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {    // Check the MachineMemOperands for basic consistency.    for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), -       E = MI->memoperands_end(); I != E; ++I) { +                                  E = MI->memoperands_end(); +       I != E; ++I) {      if ((*I)->isLoad() && !MI->mayLoad())        report("Missing mayLoad flag", MI);      if ((*I)->isStore() && !MI->mayStore()) @@ -897,7 +902,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {    // Other instructions must have one, unless they are inside a bundle.    if (LiveInts) {      bool mapped = !LiveInts->isNotInMIMap(*MI); -    if (MI->isDebugValue()) { +    if (MI->isDebugInstr()) {        if (mapped)          report("Debug instruction has a slot index", MI);      } else if (MI->isInsideBundle()) { @@ -909,32 +914,42 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {      }    } -  // Check types.    if (isPreISelGenericOpcode(MCID.getOpcode())) {      if (isFunctionSelected)        report("Unexpected generic instruction in a Selected function", MI); -    // Generic instructions specify equality constraints between some -    // of their operands. Make sure these are consistent. +    // Check types.      SmallVector<LLT, 4> Types; -    for (unsigned i = 0; i < MCID.getNumOperands(); ++i) { -      if (!MCID.OpInfo[i].isGenericType()) +    for (unsigned I = 0; I < MCID.getNumOperands(); ++I) { +      if (!MCID.OpInfo[I].isGenericType())          continue; -      size_t TypeIdx = MCID.OpInfo[i].getGenericTypeIndex(); +      // Generic instructions specify type equality constraints between some of +      // their operands. Make sure these are consistent. +      size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();        Types.resize(std::max(TypeIdx + 1, Types.size())); -      LLT OpTy = MRI->getType(MI->getOperand(i).getReg()); -      if (Types[TypeIdx].isValid() && Types[TypeIdx] != OpTy) -        report("type mismatch in generic instruction", MI); -      Types[TypeIdx] = OpTy; +      const MachineOperand *MO = &MI->getOperand(I); +      LLT OpTy = MRI->getType(MO->getReg()); +      // Don't report a type mismatch if there is no actual mismatch, only a +      // type missing, to reduce noise: +      if (OpTy.isValid()) { +        // Only the first valid type for a type index will be printed: don't +        // overwrite it later so it's always clear which type was expected: +        if (!Types[TypeIdx].isValid()) +          Types[TypeIdx] = OpTy; +        else if (Types[TypeIdx] != OpTy) +          report("Type mismatch in generic instruction", MO, I, OpTy); +      } else { +        // Generic instructions must have types attached to their operands. +        report("Generic instruction is missing a virtual register type", MO, I); +      }      } -  } -  // Generic opcodes must not have physical register operands. -  if (isPreISelGenericOpcode(MCID.getOpcode())) { -    for (auto &Op : MI->operands()) { -      if (Op.isReg() && TargetRegisterInfo::isPhysicalRegister(Op.getReg())) -        report("Generic instruction cannot have physical register", MI); +    // Generic opcodes must not have physical register operands. +    for (unsigned I = 0; I < MI->getNumOperands(); ++I) { +      const MachineOperand *MO = &MI->getOperand(I); +      if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg())) +        report("Generic instruction cannot have physical register", MO, I);      }    } @@ -971,6 +986,88 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {               MI);      break;    } +  case TargetOpcode::G_SEXT: +  case TargetOpcode::G_ZEXT: +  case TargetOpcode::G_ANYEXT: +  case TargetOpcode::G_TRUNC: +  case TargetOpcode::G_FPEXT: +  case TargetOpcode::G_FPTRUNC: { +    // Number of operands and presense of types is already checked (and +    // reported in case of any issues), so no need to report them again. As +    // we're trying to report as many issues as possible at once, however, the +    // instructions aren't guaranteed to have the right number of operands or +    // types attached to them at this point +    assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}"); +    if (MI->getNumOperands() < MCID.getNumOperands()) +      break; +    LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); +    LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); +    if (!DstTy.isValid() || !SrcTy.isValid()) +      break; + +    LLT DstElTy = DstTy.isVector() ? DstTy.getElementType() : DstTy; +    LLT SrcElTy = SrcTy.isVector() ? SrcTy.getElementType() : SrcTy; +    if (DstElTy.isPointer() || SrcElTy.isPointer()) +      report("Generic extend/truncate can not operate on pointers", MI); + +    if (DstTy.isVector() != SrcTy.isVector()) { +      report("Generic extend/truncate must be all-vector or all-scalar", MI); +      // Generally we try to report as many issues as possible at once, but in +      // this case it's not clear what should we be comparing the size of the +      // scalar with: the size of the whole vector or its lane. Instead of +      // making an arbitrary choice and emitting not so helpful message, let's +      // avoid the extra noise and stop here. +      break; +    } +    if (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()) +      report("Generic vector extend/truncate must preserve number of lanes", +             MI); +    unsigned DstSize = DstElTy.getSizeInBits(); +    unsigned SrcSize = SrcElTy.getSizeInBits(); +    switch (MI->getOpcode()) { +    default: +      if (DstSize <= SrcSize) +        report("Generic extend has destination type no larger than source", MI); +      break; +    case TargetOpcode::G_TRUNC: +    case TargetOpcode::G_FPTRUNC: +      if (DstSize >= SrcSize) +        report("Generic truncate has destination type no smaller than source", +               MI); +      break; +    } +    break; +  } +  case TargetOpcode::COPY: { +    if (foundErrors) +      break; +    const MachineOperand &DstOp = MI->getOperand(0); +    const MachineOperand &SrcOp = MI->getOperand(1); +    LLT DstTy = MRI->getType(DstOp.getReg()); +    LLT SrcTy = MRI->getType(SrcOp.getReg()); +    if (SrcTy.isValid() && DstTy.isValid()) { +      // If both types are valid, check that the types are the same. +      if (SrcTy != DstTy) { +        report("Copy Instruction is illegal with mismatching types", MI); +        errs() << "Def = " << DstTy << ", Src = " << SrcTy << "\n"; +      } +    } +    if (SrcTy.isValid() || DstTy.isValid()) { +      // If one of them have valid types, let's just check they have the same +      // size. +      unsigned SrcSize = TRI->getRegSizeInBits(SrcOp.getReg(), *MRI); +      unsigned DstSize = TRI->getRegSizeInBits(DstOp.getReg(), *MRI); +      assert(SrcSize && "Expecting size here"); +      assert(DstSize && "Expecting size here"); +      if (SrcSize != DstSize) +        if (!DstOp.getSubReg() && !SrcOp.getSubReg()) { +          report("Copy Instruction is illegal with mismatching sizes", MI); +          errs() << "Def Size = " << DstSize << ", Src Size = " << SrcSize +                 << "\n"; +        } +    } +    break; +  }    case TargetOpcode::STATEPOINT:      if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||          !MI->getOperand(StatepointOpers::NBytesPos).isImm() || @@ -1101,12 +1198,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {            }          }        } -      if (MO->isRenamable() && -          ((MO->isDef() && MI->hasExtraDefRegAllocReq()) || -           (MO->isUse() && MI->hasExtraSrcRegAllocReq()))) { -        report("Illegal isRenamable setting for opcode with extra regalloc " -               "requirements", -               MO, MONum); +      if (MO->isRenamable()) { +        if (MRI->isReserved(Reg)) { +          report("isRenamable set on reserved register", MO, MONum); +          return; +        } +      } +      if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) { +        report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum);          return;        }      } else { diff --git a/contrib/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm/lib/CodeGen/MacroFusion.cpp index e7f426c469a0..62dadbba0c1a 100644 --- a/contrib/llvm/lib/CodeGen/MacroFusion.cpp +++ b/contrib/llvm/lib/CodeGen/MacroFusion.cpp @@ -66,11 +66,11 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,      if (SI.getSUnit() == &FirstSU)        SI.setLatency(0); -  DEBUG(dbgs() << "Macro fuse: "; -        FirstSU.print(dbgs(), &DAG); dbgs() << " - "; -        SecondSU.print(dbgs(), &DAG); dbgs() << " /  "; -        dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << -                  DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); +  LLVM_DEBUG( +      dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - "; +      SecondSU.print(dbgs(), &DAG); dbgs() << " /  "; +      dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " +             << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n';);    // Make data dependencies from the FirstSU also dependent on the SecondSU to    // prevent them from being scheduled between the FirstSU and the SecondSU. @@ -80,24 +80,32 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,        if (SI.isWeak() || isHazard(SI) ||            SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU))          continue; -      DEBUG(dbgs() << "  Bind "; -            SecondSU.print(dbgs(), &DAG); dbgs() << " - "; -            SU->print(dbgs(), &DAG); dbgs() << '\n';); +      LLVM_DEBUG(dbgs() << "  Bind "; SecondSU.print(dbgs(), &DAG); +                 dbgs() << " - "; SU->print(dbgs(), &DAG); dbgs() << '\n';);        DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial));      }    // Make the FirstSU also dependent on the dependencies of the SecondSU to    // prevent them from being scheduled between the FirstSU and the SecondSU. -  if (&FirstSU != &DAG.EntrySU) +  if (&FirstSU != &DAG.EntrySU) {      for (const SDep &SI : SecondSU.Preds) {        SUnit *SU = SI.getSUnit();        if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU))          continue; -      DEBUG(dbgs() << "  Bind "; -            SU->print(dbgs(), &DAG); dbgs() << " - "; -            FirstSU.print(dbgs(), &DAG); dbgs() << '\n';); +      LLVM_DEBUG(dbgs() << "  Bind "; SU->print(dbgs(), &DAG); dbgs() << " - "; +                 FirstSU.print(dbgs(), &DAG); dbgs() << '\n';);        DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial));      } +    // ExitSU comes last by design, which acts like an implicit dependency +    // between ExitSU and any bottom root in the graph. We should transfer +    // this to FirstSU as well. +    if (&SecondSU == &DAG.ExitSU) { +      for (SUnit &SU : DAG.SUnits) { +        if (SU.Succs.empty()) +          DAG.addEdge(&FirstSU, SDep(&SU, SDep::Artificial)); +      } +    } +  }    ++NumFused;    return true; @@ -105,7 +113,7 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,  namespace { -/// \brief Post-process the DAG to create cluster edges between instrs that may +/// Post-process the DAG to create cluster edges between instrs that may  /// be fused by the processor into a single operation.  class MacroFusion : public ScheduleDAGMutation {    ShouldSchedulePredTy shouldScheduleAdjacent; @@ -135,7 +143,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {      scheduleAdjacentImpl(*DAG, DAG->ExitSU);  } -/// \brief Implement the fusion of instr pairs in the scheduling DAG, +/// Implement the fusion of instr pairs in the scheduling DAG,  /// anchored at the instr in AnchorSU..  bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {    const MachineInstr &AnchorMI = *AnchorSU.getInstr(); diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index 8972867ba083..befa8422d399 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -45,7 +45,7 @@ namespace {        initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());      } -    bool runOnMachineFunction(MachineFunction &MF) override; +    bool runOnMachineFunction(MachineFunction &Fn) override;      void getAnalysisUsage(AnalysisUsage &AU) const override {        AU.setPreservesCFG(); diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index 54c5a940275d..7a5c20000066 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -75,7 +75,7 @@ namespace {        initializePHIEliminationPass(*PassRegistry::getPassRegistry());      } -    bool runOnMachineFunction(MachineFunction &Fn) override; +    bool runOnMachineFunction(MachineFunction &MF) override;      void getAnalysisUsage(AnalysisUsage &AU) const override;    private: @@ -91,7 +91,7 @@ namespace {      /// register which is used in a PHI node. We map that to the BB the      /// vreg is coming from. This is used later to determine when the vreg      /// is killed in the BB. -    void analyzePHINodes(const MachineFunction& Fn); +    void analyzePHINodes(const MachineFunction& MF);      /// Split critical edges where necessary for good coalescer performance.      bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, @@ -270,7 +270,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,        IncomingReg = entry;        reusedIncoming = true;        ++NumReused; -      DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " << *MPhi); +      LLVM_DEBUG(dbgs() << "Reusing " << printReg(IncomingReg) << " for " +                        << *MPhi);      } else {        const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);        entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); @@ -295,9 +296,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,        // AfterPHIsIt, so it appears before the current PHICopy.        if (reusedIncoming)          if (MachineInstr *OldKill = VI.findKill(&MBB)) { -          DEBUG(dbgs() << "Remove old kill from " << *OldKill); +          LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill);            LV->removeVirtualRegisterKilled(IncomingReg, *OldKill); -          DEBUG(MBB.dump()); +          LLVM_DEBUG(MBB.dump());          }        // Add information to LiveVariables to know that the incoming value is @@ -452,7 +453,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,            KillInst = FirstTerm;            while (KillInst != opBlock.begin()) {              --KillInst; -            if (KillInst->isDebugValue()) +            if (KillInst->isDebugInstr())                continue;              if (KillInst->readsRegister(SrcReg))                break; @@ -512,7 +513,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,                KillInst = FirstTerm;                while (KillInst != opBlock.begin()) {                  --KillInst; -                if (KillInst->isDebugValue()) +                if (KillInst->isDebugInstr())                    continue;                  if (KillInst->readsRegister(SrcReg))                    break; @@ -593,9 +594,9 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,        if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)          continue;        if (ShouldSplit) { -        DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge " -                     << printMBBReference(*PreMBB) << " -> " -                     << printMBBReference(MBB) << ": " << *BBI); +        LLVM_DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge " +                          << printMBBReference(*PreMBB) << " -> " +                          << printMBBReference(MBB) << ": " << *BBI);        }        // If Reg is not live-in to MBB, it means it must be live-in to some @@ -610,10 +611,12 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,        // Check for a loop exiting edge.        if (!ShouldSplit && CurLoop != PreLoop) { -        DEBUG({ +        LLVM_DEBUG({            dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; -          if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; -          if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; +          if (PreLoop) +            dbgs() << "PreLoop: " << *PreLoop; +          if (CurLoop) +            dbgs() << "CurLoop: " << *CurLoop;          });          // This edge could be entering a loop, exiting a loop, or it could be          // both: Jumping directly form one loop to the header of a sibling @@ -624,7 +627,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,        if (!ShouldSplit && !SplitAllCriticalEdges)          continue;        if (!PreMBB->SplitCriticalEdge(&MBB, *this)) { -        DEBUG(dbgs() << "Failed to split critical edge.\n"); +        LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n");          continue;        }        Changed = true; diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp index ff8680a0540d..bc3f2a6e9b5a 100644 --- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp +++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp @@ -30,7 +30,7 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS,                      TargetMachine::CodeGenFileType FileType) {    std::unique_ptr<TargetMachine> TM = TMFactory();    legacy::PassManager CodeGenPasses; -  if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType)) +  if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))      report_fatal_error("Failed to setup codegen");    CodeGenPasses.run(*M);  } @@ -44,7 +44,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(    if (OSs.size() == 1) {      if (!BCOSs.empty()) -      WriteBitcodeToFile(M.get(), *BCOSs[0]); +      WriteBitcodeToFile(*M, *BCOSs[0]);      codegen(M.get(), *OSs[0], TMFactory, FileType);      return M;    } @@ -66,7 +66,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(            // FIXME: Provide a more direct way to do this in LLVM.            SmallString<0> BC;            raw_svector_ostream BCOS(BC); -          WriteBitcodeToFile(MPart.get(), BCOS); +          WriteBitcodeToFile(*MPart, BCOS);            if (!BCOSs.empty()) {              BCOSs[ThreadCount]->write(BC.begin(), BC.size()); diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp index 0957705b19bb..afb4b0a7e174 100644 --- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp @@ -49,6 +49,7 @@ static bool doesNotGeneratecode(const MachineInstr &MI) {    case TargetOpcode::EH_LABEL:    case TargetOpcode::GC_LABEL:    case TargetOpcode::DBG_VALUE: +  case TargetOpcode::DBG_LABEL:      return true;    }  } diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 1320f9985553..1d058ccfb633 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -202,7 +202,7 @@ namespace {      bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs); -    /// \brief Finds recurrence cycles, but only ones that formulated around +    /// Finds recurrence cycles, but only ones that formulated around      /// a def operand and a use operand that are tied. If there is a use      /// operand commutable with the tied use operand, find recurrence cycle      /// along that operand as well. @@ -210,7 +210,7 @@ namespace {                                const SmallSet<unsigned, 2> &TargetReg,                                RecurrenceCycle &RC); -    /// \brief If copy instruction \p MI is a virtual register copy, track it in +    /// If copy instruction \p MI is a virtual register copy, track it in      /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was      /// previously seen as a copy, replace the uses of this copy with the      /// previously seen copy's destination register. @@ -221,7 +221,7 @@ namespace {      /// Is the register \p Reg a non-allocatable physical register?      bool isNAPhysCopy(unsigned Reg); -    /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical +    /// If copy instruction \p MI is a non-allocatable virtual<->physical      /// register copy, track it in the \p NAPhysToVirtMIs map. If this      /// non-allocatable physical register was previously copied to a virtual      /// registered and hasn't been clobbered, the virt->phys copy can be @@ -232,7 +232,7 @@ namespace {      bool isLoadFoldable(MachineInstr &MI,                          SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); -    /// \brief Check whether \p MI is understood by the register coalescer +    /// Check whether \p MI is understood by the register coalescer      /// but may require some rewriting.      bool isCoalescableCopy(const MachineInstr &MI) {        // SubregToRegs are not interesting, because they are already register @@ -242,7 +242,7 @@ namespace {                                MI.isExtractSubreg()));      } -    /// \brief Check whether \p MI is a copy like instruction that is +    /// Check whether \p MI is a copy like instruction that is      /// not recognized by the register coalescer.      bool isUncoalescableCopy(const MachineInstr &MI) {        return MI.isBitcast() || @@ -345,7 +345,7 @@ namespace {      }    }; -  /// \brief Helper class to track the possible sources of a value defined by +  /// Helper class to track the possible sources of a value defined by    /// a (chain of) copy related instructions.    /// Given a definition (instruction and definition index), this class    /// follows the use-def chain to find successive suitable sources. @@ -425,7 +425,7 @@ namespace {        }      } -    /// \brief Following the use-def chain, get the next available source +    /// Following the use-def chain, get the next available source      /// for the tracked value.      /// \return A ValueTrackerResult containing a set of registers      /// and sub registers with tracked values. A ValueTrackerResult with @@ -646,7 +646,7 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {    return TII->optimizeCondBranch(MI);  } -/// \brief Try to find the next source that share the same register file +/// Try to find the next source that share the same register file  /// for the value defined by \p Reg and \p SubReg.  /// When true is returned, the \p RewriteMap can be used by the client to  /// retrieve all Def -> Use along the way up to the next source. Any found @@ -696,7 +696,8 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,          // An existent entry with multiple sources is a PHI cycle we must avoid.          // Otherwise it's an entry with a valid next source we already found.          if (CurSrcRes.getNumSources() > 1) { -          DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); +          LLVM_DEBUG(dbgs() +                     << "findNextSource: found PHI cycle, aborting...\n");            return false;          }          break; @@ -709,7 +710,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,        if (NumSrcs > 1) {          PHICount++;          if (PHICount >= RewritePHILimit) { -          DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); +          LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");            return false;          } @@ -746,7 +747,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,    return CurSrcPair.Reg != Reg;  } -/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are +/// Insert a PHI instruction with incoming edges \p SrcRegs that are  /// guaranteed to have the same register class. This is necessary whenever we  /// successfully traverse a PHI instruction and find suitable sources coming  /// from its edges. By inserting a new PHI, we provide a rewritten PHI def @@ -791,7 +792,7 @@ public:    Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}    virtual ~Rewriter() {} -  /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and +  /// Get the next rewritable source (SrcReg, SrcSubReg) and    /// the related value that it affects (DstReg, DstSubReg).    /// A source is considered rewritable if its register class and the    /// register class of the related DstReg may not be register @@ -859,7 +860,7 @@ public:    }  }; -/// \brief Helper class to rewrite uncoalescable copy like instructions +/// Helper class to rewrite uncoalescable copy like instructions  /// into new COPY (coalescable friendly) instructions.  class UncoalescableRewriter : public Rewriter {    unsigned NumDefs;  ///< Number of defs in the bitcast. @@ -1101,7 +1102,7 @@ static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) {    }  } -/// \brief Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find +/// Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find  /// the new source to use for rewrite. If \p HandleMultipleSources is true and  /// multiple sources for a given \p Def are found along the way, we found a  /// PHI instructions that needs to be rewritten. @@ -1143,9 +1144,9 @@ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,      // Build the new PHI node and return its def register as the new source.      MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());      MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI); -    DEBUG(dbgs() << "-- getNewSource\n"); -    DEBUG(dbgs() << "   Replacing: " << OrigPHI); -    DEBUG(dbgs() << "        With: " << NewPHI); +    LLVM_DEBUG(dbgs() << "-- getNewSource\n"); +    LLVM_DEBUG(dbgs() << "   Replacing: " << OrigPHI); +    LLVM_DEBUG(dbgs() << "        With: " << NewPHI);      const MachineOperand &MODef = NewPHI.getOperand(0);      return RegSubRegPair(MODef.getReg(), MODef.getSubReg());    } @@ -1213,7 +1214,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {    return Changed;  } -/// \brief Rewrite the source found through \p Def, by using the \p RewriteMap +/// Rewrite the source found through \p Def, by using the \p RewriteMap  /// and create a new COPY instruction. More info about RewriteMap in  /// PeepholeOptimizer::findNextSource. Right now this is only used to handle  /// Uncoalescable copies, since they are copy like instructions that aren't @@ -1241,9 +1242,9 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,      NewCopy->getOperand(0).setIsUndef();    } -  DEBUG(dbgs() << "-- RewriteSource\n"); -  DEBUG(dbgs() << "   Replacing: " << CopyLike); -  DEBUG(dbgs() << "        With: " << *NewCopy); +  LLVM_DEBUG(dbgs() << "-- RewriteSource\n"); +  LLVM_DEBUG(dbgs() << "   Replacing: " << CopyLike); +  LLVM_DEBUG(dbgs() << "        With: " << *NewCopy);    MRI->replaceRegWith(Def.Reg, NewVReg);    MRI->clearKillFlags(NewVReg); @@ -1254,7 +1255,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,    return *NewCopy;  } -/// \brief Optimize copy-like instructions to create +/// Optimize copy-like instructions to create  /// register coalescer friendly instruction.  /// The optimization tries to kill-off the \p MI by looking  /// through a chain of copies to find a source that has a compatible @@ -1462,7 +1463,8 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(    if (PrevCopy == NAPhysToVirtMIs.end()) {      // We can't remove the copy: there was an intervening clobber of the      // non-allocatable physical register after the copy to virtual. -    DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << MI); +    LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " +                      << MI);      return false;    } @@ -1470,7 +1472,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(    if (PrevDstReg == SrcReg) {      // Remove the virt->phys copy: we saw the virtual register definition, and      // the non-allocatable physical register's state hasn't changed since then. -    DEBUG(dbgs() << "NAPhysCopy: erasing " << MI); +    LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);      ++NumNAPhysCopies;      return true;    } @@ -1479,7 +1481,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(    // register get a copy of the non-allocatable physical register, and we only    // track one such copy. Avoid getting confused by this new non-allocatable    // physical register definition, and remove it from the tracked copies. -  DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI); +  LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);    NAPhysToVirtMIs.erase(PrevCopy);    return false;  } @@ -1575,15 +1577,15 @@ bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {    if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {      // Commutes operands of instructions in RC if necessary so that the copy to      // be generated from PHI can be coalesced. -    DEBUG(dbgs() << "Optimize recurrence chain from " << PHI); +    LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);      for (auto &RI : RC) { -      DEBUG(dbgs() << "\tInst: " << *(RI.getMI())); +      LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));        auto CP = RI.getCommutePair();        if (CP) {          Changed = true;          TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,                                  (*CP).second); -        DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI())); +        LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));        }      }    } @@ -1595,8 +1597,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {    if (skipFunction(MF.getFunction()))      return false; -  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); -  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); +  LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');    if (DisablePeephole)      return false; @@ -1643,8 +1645,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {        ++MII;        LocalMIs.insert(MI); -      // Skip debug values. They should not affect this peephole optimization. -      if (MI->isDebugValue()) +      // Skip debug instructions. They should not affect this peephole optimization. +      if (MI->isDebugInstr())            continue;        if (MI->isPosition()) @@ -1667,7 +1669,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {                if (Def != NAPhysToVirtMIs.end()) {                  // A new definition of the non-allocatable physical register                  // invalidates previous copies. -                DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); +                LLVM_DEBUG(dbgs() +                           << "NAPhysCopy: invalidating because of " << *MI);                  NAPhysToVirtMIs.erase(Def);                }              } @@ -1676,7 +1679,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {              for (auto &RegMI : NAPhysToVirtMIs) {                unsigned Def = RegMI.first;                if (MachineOperand::clobbersPhysReg(RegMask, Def)) { -                DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); +                LLVM_DEBUG(dbgs() +                           << "NAPhysCopy: invalidating because of " << *MI);                  NAPhysToVirtMIs.erase(Def);                }              } @@ -1692,7 +1696,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {          // don't know what's correct anymore.          //          // FIXME: handle explicit asm clobbers. -        DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI); +        LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " +                          << *MI);          NAPhysToVirtMIs.clear();        } @@ -1768,8 +1773,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {                      TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {                // Update LocalMIs since we replaced MI with FoldMI and deleted                // DefMI. -              DEBUG(dbgs() << "Replacing: " << *MI); -              DEBUG(dbgs() << "     With: " << *FoldMI); +              LLVM_DEBUG(dbgs() << "Replacing: " << *MI); +              LLVM_DEBUG(dbgs() << "     With: " << *FoldMI);                LocalMIs.erase(MI);                LocalMIs.erase(DefMI);                LocalMIs.insert(FoldMI); @@ -1791,7 +1796,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {        // the load candidates.  Note: We might be able to fold *into* this        // instruction, so this needs to be after the folding logic.        if (MI->isLoadFoldBarrier()) { -        DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); +        LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);          FoldAsLoadDefCandidates.clear();        }      } diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 5d86faafdd85..215da630caf4 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -38,6 +38,7 @@  #include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" @@ -242,11 +243,11 @@ void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,  /// Print the schedule before exiting the region.  void SchedulePostRATDList::exitRegion() { -  DEBUG({ -      dbgs() << "*** Final schedule ***\n"; -      dumpSchedule(); -      dbgs() << '\n'; -    }); +  LLVM_DEBUG({ +    dbgs() << "*** Final schedule ***\n"; +    dumpSchedule(); +    dbgs() << '\n'; +  });    ScheduleDAGInstrs::exitRegion();  } @@ -308,7 +309,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {           : TargetSubtargetInfo::ANTIDEP_NONE);    } -  DEBUG(dbgs() << "PostRAScheduler\n"); +  LLVM_DEBUG(dbgs() << "PostRAScheduler\n");    SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,                                   CriticalPathRCs); @@ -412,13 +413,12 @@ void SchedulePostRATDList::schedule() {    postprocessDAG(); -  DEBUG(dbgs() << "********** List Scheduling **********\n"); -  DEBUG( -    for (const SUnit &SU : SUnits) { -      SU.dumpAll(this); -      dbgs() << '\n'; -    } -  ); +  LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n"); +  LLVM_DEBUG(for (const SUnit &SU +                  : SUnits) { +    SU.dumpAll(this); +    dbgs() << '\n'; +  });    AvailableQueue.initNodes(SUnits);    ListScheduleTopDown(); @@ -501,8 +501,8 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {  /// count of its successors. If a successor pending count is zero, add it to  /// the Available queue.  void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { -  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); -  DEBUG(SU->dump(this)); +  LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); +  LLVM_DEBUG(SU->dump(this));    Sequence.push_back(SU);    assert(CurCycle >= SU->getDepth() && @@ -516,7 +516,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {  /// emitNoop - Add a noop to the current instruction sequence.  void SchedulePostRATDList::emitNoop(unsigned CurCycle) { -  DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); +  LLVM_DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');    HazardRec->EmitNoop();    Sequence.push_back(nullptr);   // NULL here means noop    ++NumNoops; @@ -568,7 +568,8 @@ void SchedulePostRATDList::ListScheduleTopDown() {          MinDepth = PendingQueue[i]->getDepth();      } -    DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); +    LLVM_DEBUG(dbgs() << "\n*** Examining Available\n"; +               AvailableQueue.dump(this));      SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;      bool HasNoopHazards = false; @@ -604,7 +605,8 @@ void SchedulePostRATDList::ListScheduleTopDown() {      // non-preferred node.      if (NotPreferredSUnit) {        if (!FoundSUnit) { -        DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); +        LLVM_DEBUG( +            dbgs() << "*** Will schedule a non-preferred instruction...\n");          FoundSUnit = NotPreferredSUnit;        } else {          AvailableQueue.push(NotPreferredSUnit); @@ -631,19 +633,20 @@ void SchedulePostRATDList::ListScheduleTopDown() {        HazardRec->EmitInstruction(FoundSUnit);        CycleHasInsts = true;        if (HazardRec->atIssueLimit()) { -        DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n'); +        LLVM_DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle +                          << '\n');          HazardRec->AdvanceCycle();          ++CurCycle;          CycleHasInsts = false;        }      } else {        if (CycleHasInsts) { -        DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); +        LLVM_DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');          HazardRec->AdvanceCycle();        } else if (!HasNoopHazards) {          // Otherwise, we have a pipeline stall, but no other problem,          // just advance the current cycle and try again. -        DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); +        LLVM_DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');          HazardRec->AdvanceCycle();          ++NumStalls;        } else { diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 48b48c5f6499..7e9b4af12ee9 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -44,7 +44,7 @@ public:    void getAnalysisUsage(AnalysisUsage &au) const override; -  bool runOnMachineFunction(MachineFunction &fn) override; +  bool runOnMachineFunction(MachineFunction &MF) override;  };  } // end anonymous namespace @@ -73,7 +73,7 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {  }  void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { -  DEBUG(dbgs() << "Processing " << *MI); +  LLVM_DEBUG(dbgs() << "Processing " << *MI);    unsigned Reg = MI->getOperand(0).getReg();    if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -84,7 +84,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {        MachineInstr *UserMI = MO.getParent();        if (!canTurnIntoImplicitDef(UserMI))          continue; -      DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); +      LLVM_DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);        UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));        WorkList.insert(UserMI);      } @@ -116,7 +116,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {    // If we found the using MI, we can erase the IMPLICIT_DEF.    if (Found) { -    DEBUG(dbgs() << "Physreg user: " << *UserMI); +    LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);      MI->eraseFromParent();      return;    } @@ -125,15 +125,15 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {    // Leave the physreg IMPLICIT_DEF, but trim any extra operands.    for (unsigned i = MI->getNumOperands() - 1; i; --i)      MI->RemoveOperand(i); -  DEBUG(dbgs() << "Keeping physreg: " << *MI); +  LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);  }  /// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into  /// <undef> operands.  bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { -  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" -               << "********** Function: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" +                    << "********** Function: " << MF.getName() << '\n');    bool Changed = false; @@ -154,8 +154,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {      if (WorkList.empty())        continue; -    DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size() -                 << " implicit defs.\n"); +    LLVM_DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size() +                      << " implicit defs.\n");      Changed = true;      // Drain the WorkList to recursively process any new implicit defs. diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index a8d8ad8ac7dc..fc62c8caf59e 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -38,7 +38,6 @@  #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetOpcodes.h" @@ -90,7 +89,7 @@ public:    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract    /// frame indexes with appropriate references. -  bool runOnMachineFunction(MachineFunction &Fn) override; +  bool runOnMachineFunction(MachineFunction &MF) override;  private:    RegScavenger *RS; @@ -117,15 +116,15 @@ private:    // Emit remarks.    MachineOptimizationRemarkEmitter *ORE = nullptr; -  void calculateCallFrameInfo(MachineFunction &Fn); -  void calculateSaveRestoreBlocks(MachineFunction &Fn); +  void calculateCallFrameInfo(MachineFunction &MF); +  void calculateSaveRestoreBlocks(MachineFunction &MF);    void spillCalleeSavedRegs(MachineFunction &MF); -  void calculateFrameObjectOffsets(MachineFunction &Fn); -  void replaceFrameIndices(MachineFunction &Fn); -  void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, +  void calculateFrameObjectOffsets(MachineFunction &MF); +  void replaceFrameIndices(MachineFunction &MF); +  void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,                             int &SPAdj); -  void insertPrologEpilogCode(MachineFunction &Fn); +  void insertPrologEpilogCode(MachineFunction &MF);  };  } // end anonymous namespace @@ -143,7 +142,6 @@ INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false,                        false)  INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)  INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(StackProtector)  INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)  INITIALIZE_PASS_END(PEI, DEBUG_TYPE,                      "Prologue/Epilogue Insertion & Frame Finalization", false, @@ -160,7 +158,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {    AU.setPreservesCFG();    AU.addPreserved<MachineLoopInfo>();    AU.addPreserved<MachineDominatorTree>(); -  AU.addRequired<StackProtector>();    AU.addRequired<MachineOptimizationRemarkEmitterPass>();    MachineFunctionPass::getAnalysisUsage(AU);  } @@ -170,36 +167,36 @@ using StackObjSet = SmallSetVector<int, 8>;  /// runOnMachineFunction - Insert prolog/epilog code and replace abstract  /// frame indexes with appropriate references. -bool PEI::runOnMachineFunction(MachineFunction &Fn) { -  const Function &F = Fn.getFunction(); -  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); -  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); +bool PEI::runOnMachineFunction(MachineFunction &MF) { +  const Function &F = MF.getFunction(); +  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); +  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); -  RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; -  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); +  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; +  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);    FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) || -    TRI->requiresFrameIndexReplacementScavenging(Fn); +    TRI->requiresFrameIndexReplacementScavenging(MF);    ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();    // Calculate the MaxCallFrameSize and AdjustsStack variables for the    // function's frame information. Also eliminates call frame pseudo    // instructions. -  calculateCallFrameInfo(Fn); +  calculateCallFrameInfo(MF);    // Determine placement of CSR spill/restore code and prolog/epilog code:    // place all spills in the entry block, all restores in return blocks. -  calculateSaveRestoreBlocks(Fn); +  calculateSaveRestoreBlocks(MF);    // Handle CSR spilling and restoring, for targets that need it. -  if (Fn.getTarget().usesPhysRegsForPEI()) -    spillCalleeSavedRegs(Fn); +  if (MF.getTarget().usesPhysRegsForPEI()) +    spillCalleeSavedRegs(MF);    // Allow the target machine to make final modifications to the function    // before the frame layout is finalized. -  TFI->processFunctionBeforeFrameFinalized(Fn, RS); +  TFI->processFunctionBeforeFrameFinalized(MF, RS);    // Calculate actual frame offsets for all abstract stack objects... -  calculateFrameObjectOffsets(Fn); +  calculateFrameObjectOffsets(MF);    // Add prolog and epilog code to the function.  This function is required    // to align the stack frame as necessary for any stack variables or @@ -207,26 +204,32 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {    // must be called before this function in order to set the AdjustsStack    // and MaxCallFrameSize variables.    if (!F.hasFnAttribute(Attribute::Naked)) -    insertPrologEpilogCode(Fn); +    insertPrologEpilogCode(MF);    // Replace all MO_FrameIndex operands with physical register references    // and actual offsets.    // -  replaceFrameIndices(Fn); +  replaceFrameIndices(MF);    // If register scavenging is needed, as we've enabled doing it as a    // post-pass, scavenge the virtual registers that frame index elimination    // inserted. -  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) -    scavengeFrameVirtualRegs(Fn, *RS); +  if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) +    scavengeFrameVirtualRegs(MF, *RS);    // Warn on stack size when we exceeds the given limit. -  MachineFrameInfo &MFI = Fn.getFrameInfo(); +  MachineFrameInfo &MFI = MF.getFrameInfo();    uint64_t StackSize = MFI.getStackSize();    if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {      DiagnosticInfoStackSize DiagStackSize(F, StackSize);      F.getContext().diagnose(DiagStackSize);    } +  ORE->emit([&]() { +    return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", +                                             MF.getFunction().getSubprogram(), +                                             &MF.front()) +           << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; +  });    delete RS;    SaveBlocks.clear(); @@ -239,10 +242,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {  /// Calculate the MaxCallFrameSize and AdjustsStack  /// variables for the function's frame information and eliminate call frame  /// pseudo instructions. -void PEI::calculateCallFrameInfo(MachineFunction &Fn) { -  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); -  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); -  MachineFrameInfo &MFI = Fn.getFrameInfo(); +void PEI::calculateCallFrameInfo(MachineFunction &MF) { +  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); +  MachineFrameInfo &MFI = MF.getFrameInfo();    unsigned MaxCallFrameSize = 0;    bool AdjustsStack = MFI.adjustsStack(); @@ -257,7 +260,7 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {      return;    std::vector<MachineBasicBlock::iterator> FrameSDOps; -  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) +  for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)      for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)        if (TII.isFrameInstr(*I)) {          unsigned Size = TII.getFrameSize(*I); @@ -285,15 +288,15 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {      // the target doesn't indicate otherwise, remove the call frame pseudos      // here. The sub/add sp instruction pairs are still inserted, but we don't      // need to track the SP adjustment for frame index elimination. -    if (TFI->canSimplifyCallFramePseudos(Fn)) -      TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); +    if (TFI->canSimplifyCallFramePseudos(MF)) +      TFI->eliminateCallFramePseudoInstr(MF, *I->getParent(), I);    }  }  /// Compute the sets of entry and return blocks for saving and restoring  /// callee-saved registers, and placing prolog and epilog code. -void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) { -  const MachineFrameInfo &MFI = Fn.getFrameInfo(); +void PEI::calculateSaveRestoreBlocks(MachineFunction &MF) { +  const MachineFrameInfo &MFI = MF.getFrameInfo();    // Even when we do not change any CSR, we still want to insert the    // prologue and epilogue of the function. @@ -313,8 +316,8 @@ void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {    }    // Save refs to entry and return blocks. -  SaveBlocks.push_back(&Fn.front()); -  for (MachineBasicBlock &MBB : Fn) { +  SaveBlocks.push_back(&MF.front()); +  for (MachineBasicBlock &MBB : MF) {      if (MBB.isEHFuncletEntry())        SaveBlocks.push_back(&MBB);      if (MBB.isReturnBlock()) @@ -457,10 +460,10 @@ static void updateLiveness(MachineFunction &MF) {  /// Insert restore code for the callee-saved registers used in the function.  static void insertCSRSaves(MachineBasicBlock &SaveBlock,                             ArrayRef<CalleeSavedInfo> CSI) { -  MachineFunction &Fn = *SaveBlock.getParent(); -  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); -  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); -  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); +  MachineFunction &MF = *SaveBlock.getParent(); +  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); +  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();    MachineBasicBlock::iterator I = SaveBlock.begin();    if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { @@ -477,10 +480,10 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,  /// Insert restore code for the callee-saved registers used in the function.  static void insertCSRRestores(MachineBasicBlock &RestoreBlock,                                std::vector<CalleeSavedInfo> &CSI) { -  MachineFunction &Fn = *RestoreBlock.getParent(); -  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); -  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); -  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); +  MachineFunction &MF = *RestoreBlock.getParent(); +  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); +  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();    // Restore all registers immediately before the return and any    // terminators that precede it. @@ -499,27 +502,27 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,    }  } -void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { +void PEI::spillCalleeSavedRegs(MachineFunction &MF) {    // We can't list this requirement in getRequiredProperties because some    // targets (WebAssembly) use virtual registers past this point, and the pass    // pipeline is set up without giving the passes a chance to look at the    // TargetMachine.    // FIXME: Find a way to express this in getRequiredProperties. -  assert(Fn.getProperties().hasProperty( +  assert(MF.getProperties().hasProperty(        MachineFunctionProperties::Property::NoVRegs)); -  const Function &F = Fn.getFunction(); -  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); -  MachineFrameInfo &MFI = Fn.getFrameInfo(); +  const Function &F = MF.getFunction(); +  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); +  MachineFrameInfo &MFI = MF.getFrameInfo();    MinCSFrameIndex = std::numeric_limits<unsigned>::max();    MaxCSFrameIndex = 0;    // Determine which of the registers in the callee save list should be saved.    BitVector SavedRegs; -  TFI->determineCalleeSaves(Fn, SavedRegs, RS); +  TFI->determineCalleeSaves(MF, SavedRegs, RS);    // Assign stack slots for any callee-saved registers that must be spilled. -  assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); +  assignCalleeSavedSpillSlots(MF, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);    // Add the code to save and restore the callee saved registers.    if (!F.hasFnAttribute(Attribute::Naked)) { @@ -531,7 +534,7 @@ void PEI::spillCalleeSavedRegs(MachineFunction &Fn) {          insertCSRSaves(*SaveBlock, CSI);          // Update the live-in information of all the blocks up to the save          // point. -        updateLiveness(Fn); +        updateLiveness(MF);        }        for (MachineBasicBlock *RestoreBlock : RestoreBlocks)          insertCSRRestores(*RestoreBlock, CSI); @@ -558,10 +561,12 @@ AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,    Offset = alignTo(Offset, Align, Skew);    if (StackGrowsDown) { -    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); +    LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset +                      << "]\n");      MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset    } else { -    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); +    LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset +                      << "]\n");      MFI.setObjectOffset(FrameIdx, Offset);      Offset += MFI.getObjectSize(FrameIdx);    } @@ -654,12 +659,12 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,    if (StackGrowsDown) {      int ObjStart = -(FreeStart + ObjSize); -    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart -                 << "]\n"); +    LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" +                      << ObjStart << "]\n");      MFI.setObjectOffset(FrameIdx, ObjStart);    } else { -    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart -                 << "]\n"); +    LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" +                      << FreeStart << "]\n");      MFI.setObjectOffset(FrameIdx, FreeStart);    } @@ -685,15 +690,14 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs,  /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the  /// abstract stack objects. -void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { -  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); -  StackProtector *SP = &getAnalysis<StackProtector>(); +void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { +  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();    bool StackGrowsDown =      TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;    // Loop over all of the stack objects, assigning sequential addresses... -  MachineFrameInfo &MFI = Fn.getFrameInfo(); +  MachineFrameInfo &MFI = MF.getFrameInfo();    // Start at the beginning of the local area.    // The Offset is the distance from the stack top in the direction @@ -706,7 +710,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {    int64_t Offset = LocalAreaOffset;    // Skew to be applied to alignment. -  unsigned Skew = TFI.getStackAlignmentSkew(Fn); +  unsigned Skew = TFI.getStackAlignmentSkew(MF);    // If there are fixed sized objects that are preallocated in the local area,    // non-fixed objects can't be allocated right at the start of local area. @@ -739,7 +743,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {        // Adjust to alignment boundary        Offset = alignTo(Offset, Align, Skew); -      DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); +      LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");        MFI.setObjectOffset(i, -Offset);        // Set the computed offset      }    } else if (MaxCSFrameIndex >= MinCSFrameIndex) { @@ -752,7 +756,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {        // Adjust to alignment boundary        Offset = alignTo(Offset, Align, Skew); -      DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); +      LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");        MFI.setObjectOffset(i, Offset);        Offset += MFI.getObjectSize(i);      } @@ -766,11 +770,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {    // Make sure the special register scavenging spill slot is closest to the    // incoming stack pointer if a frame pointer is required and is closer    // to the incoming rather than the final stack pointer. -  const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); -  bool EarlyScavengingSlots = (TFI.hasFP(Fn) && +  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); +  bool EarlyScavengingSlots = (TFI.hasFP(MF) &&                                 TFI.isFPCloseToIncomingSP() && -                               RegInfo->useFPForScavengingIndex(Fn) && -                               !RegInfo->needsStackRealignment(Fn)); +                               RegInfo->useFPForScavengingIndex(MF) && +                               !RegInfo->needsStackRealignment(MF));    if (RS && EarlyScavengingSlots) {      SmallVector<int, 2> SFIs;      RS->getScavengingFrameIndices(SFIs); @@ -789,14 +793,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {      // Adjust to alignment boundary.      Offset = alignTo(Offset, Align, Skew); -    DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); +    LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");      // Resolve offsets for objects in the local block.      for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {        std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);        int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; -      DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << -            FIOffset << "]\n"); +      LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset +                        << "]\n");        MFI.setObjectOffset(Entry.first, FIOffset);      }      // Allocate the local block @@ -807,7 +811,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {    // Retrieve the Exception Handler registration node.    int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); -  if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo()) +  if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo())      EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;    // Make sure that the stack protector comes before the local variables on the @@ -836,16 +840,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {            EHRegNodeFrameIndex == (int)i)          continue; -      switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) { -      case StackProtector::SSPLK_None: +      switch (MFI.getObjectSSPLayout(i)) { +      case MachineFrameInfo::SSPLK_None:          continue; -      case StackProtector::SSPLK_SmallArray: +      case MachineFrameInfo::SSPLK_SmallArray:          SmallArrayObjs.insert(i);          continue; -      case StackProtector::SSPLK_AddrOf: +      case MachineFrameInfo::SSPLK_AddrOf:          AddrOfObjs.insert(i);          continue; -      case StackProtector::SSPLK_LargeArray: +      case MachineFrameInfo::SSPLK_LargeArray:          LargeArrayObjs.insert(i);          continue;        } @@ -889,9 +893,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {                        MaxAlign, Skew);    // Give the targets a chance to order the objects the way they like it. -  if (Fn.getTarget().getOptLevel() != CodeGenOpt::None && -      Fn.getTarget().Options.StackSymbolOrdering) -    TFI.orderFrameObjects(Fn, ObjectsToAllocate); +  if (MF.getTarget().getOptLevel() != CodeGenOpt::None && +      MF.getTarget().Options.StackSymbolOrdering) +    TFI.orderFrameObjects(MF, ObjectsToAllocate);    // Keep track of which bytes in the fixed and callee-save range are used so we    // can use the holes when allocating later stack objects.  Only do this if @@ -899,8 +903,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {    // optimizing.    BitVector StackBytesFree;    if (!ObjectsToAllocate.empty() && -      Fn.getTarget().getOptLevel() != CodeGenOpt::None && -      MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn)) +      MF.getTarget().getOptLevel() != CodeGenOpt::None && +      MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF))      computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,                            FixedCSEnd, StackBytesFree); @@ -924,7 +928,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {      // If we have reserved argument space for call sites in the function      // immediately on entry to the current function, count it as part of the      // overall stack size. -    if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn)) +    if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF))        Offset += MFI.getMaxCallFrameSize();      // Round up the size to a multiple of the alignment.  If the function has @@ -934,7 +938,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {      // value.      unsigned StackAlign;      if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || -        (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0)) +        (RegInfo->needsStackRealignment(MF) && MFI.getObjectIndexEnd() != 0))        StackAlign = TFI.getStackAlignment();      else        StackAlign = TFI.getTransientStackAlignment(); @@ -949,68 +953,61 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {    int64_t StackSize = Offset - LocalAreaOffset;    MFI.setStackSize(StackSize);    NumBytesStackSpace += StackSize; - -  ORE->emit([&]() { -    return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", -                                             Fn.getFunction().getSubprogram(), -                                             &Fn.front()) -           << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; -  });  }  /// insertPrologEpilogCode - Scan the function for modified callee saved  /// registers, insert spill code for these callee saved registers, then add  /// prolog and epilog code to the function. -void PEI::insertPrologEpilogCode(MachineFunction &Fn) { -  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); +void PEI::insertPrologEpilogCode(MachineFunction &MF) { +  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();    // Add prologue to the function...    for (MachineBasicBlock *SaveBlock : SaveBlocks) -    TFI.emitPrologue(Fn, *SaveBlock); +    TFI.emitPrologue(MF, *SaveBlock);    // Add epilogue to restore the callee-save registers in each exiting block.    for (MachineBasicBlock *RestoreBlock : RestoreBlocks) -    TFI.emitEpilogue(Fn, *RestoreBlock); +    TFI.emitEpilogue(MF, *RestoreBlock);    for (MachineBasicBlock *SaveBlock : SaveBlocks) -    TFI.inlineStackProbe(Fn, *SaveBlock); +    TFI.inlineStackProbe(MF, *SaveBlock);    // Emit additional code that is required to support segmented stacks, if    // we've been asked for it.  This, when linked with a runtime with support    // for segmented stacks (libgcc is one), will result in allocating stack    // space in small chunks instead of one large contiguous block. -  if (Fn.shouldSplitStack()) { +  if (MF.shouldSplitStack()) {      for (MachineBasicBlock *SaveBlock : SaveBlocks) -      TFI.adjustForSegmentedStacks(Fn, *SaveBlock); +      TFI.adjustForSegmentedStacks(MF, *SaveBlock);      // Record that there are split-stack functions, so we will emit a      // special section to tell the linker. -    Fn.getMMI().setHasSplitStack(true); +    MF.getMMI().setHasSplitStack(true);    } else -    Fn.getMMI().setHasNosplitStack(true); +    MF.getMMI().setHasNosplitStack(true);    // Emit additional code that is required to explicitly handle the stack in    // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The    // approach is rather similar to that of Segmented Stacks, but it uses a    // different conditional check and another BIF for allocating more stack    // space. -  if (Fn.getFunction().getCallingConv() == CallingConv::HiPE) +  if (MF.getFunction().getCallingConv() == CallingConv::HiPE)      for (MachineBasicBlock *SaveBlock : SaveBlocks) -      TFI.adjustForHiPEPrologue(Fn, *SaveBlock); +      TFI.adjustForHiPEPrologue(MF, *SaveBlock);  }  /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical  /// register references and actual offsets. -void PEI::replaceFrameIndices(MachineFunction &Fn) { -  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); -  if (!TFI.needsFrameIndexResolution(Fn)) return; +void PEI::replaceFrameIndices(MachineFunction &MF) { +  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); +  if (!TFI.needsFrameIndexResolution(MF)) return;    // Store SPAdj at exit of a basic block.    SmallVector<int, 8> SPState; -  SPState.resize(Fn.getNumBlockIDs()); +  SPState.resize(MF.getNumBlockIDs());    df_iterator_default_set<MachineBasicBlock*> Reachable;    // Iterate over the reachable blocks in DFS order. -  for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); +  for (auto DFI = df_ext_begin(&MF, Reachable), DFE = df_ext_end(&MF, Reachable);         DFI != DFE; ++DFI) {      int SPAdj = 0;      // Check the exit state of the DFS stack predecessor. @@ -1021,27 +1018,27 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {        SPAdj = SPState[StackPred->getNumber()];      }      MachineBasicBlock *BB = *DFI; -    replaceFrameIndices(BB, Fn, SPAdj); +    replaceFrameIndices(BB, MF, SPAdj);      SPState[BB->getNumber()] = SPAdj;    }    // Handle the unreachable blocks. -  for (auto &BB : Fn) { +  for (auto &BB : MF) {      if (Reachable.count(&BB))        // Already handled in DFS traversal.        continue;      int SPAdj = 0; -    replaceFrameIndices(&BB, Fn, SPAdj); +    replaceFrameIndices(&BB, MF, SPAdj);    }  } -void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,                                int &SPAdj) { -  assert(Fn.getSubtarget().getRegisterInfo() && +  assert(MF.getSubtarget().getRegisterInfo() &&           "getRegisterInfo() must be implemented!"); -  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); -  const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); -  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); +  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); +  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();    if (RS && FrameIndexEliminationScavenging)      RS->enterBasicBlock(*BB); @@ -1052,7 +1049,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,      if (TII.isFrameInstr(*I)) {        InsideCallSequence = TII.isFrameSetup(*I);        SPAdj += TII.getSPAdjust(*I); -      I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); +      I = TFI->eliminateCallFramePseudoInstr(MF, *BB, I);        continue;      } @@ -1071,8 +1068,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,                           "operand of a DBG_VALUE machine instruction");          unsigned Reg;          int64_t Offset = -            TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg); +            TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);          MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); +        MI.getOperand(0).setIsDebug();          auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),                                               DIExpression::NoDeref, Offset);          MI.getOperand(3).setMetadata(DIExpr); @@ -1091,7 +1089,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,          unsigned Reg;          MachineOperand &Offset = MI.getOperand(i + 1);          int refOffset = TFI->getFrameIndexReferencePreferSP( -            Fn, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); +            MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);          Offset.setImm(Offset.getImm() + refOffset);          MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);          continue; diff --git a/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp new file mode 100644 index 000000000000..050fef5d25ed --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -0,0 +1,195 @@ +//===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "reaching-deps-analysis" + +char ReachingDefAnalysis::ID = 0; +INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false, +                true) + +void ReachingDefAnalysis::enterBasicBlock( +    const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + +  MachineBasicBlock *MBB = TraversedMBB.MBB; +  unsigned MBBNumber = MBB->getNumber(); +  assert(MBBNumber < MBBReachingDefs.size() && +         "Unexpected basic block number."); +  MBBReachingDefs[MBBNumber].resize(NumRegUnits); + +  // Reset instruction counter in each basic block. +  CurInstr = 0; + +  // Set up LiveRegs to represent registers entering MBB. +  // Default values are 'nothing happened a long time ago'. +  if (LiveRegs.empty()) +    LiveRegs.assign(NumRegUnits, ReachingDefDefaultVal); + +  // This is the entry block. +  if (MBB->pred_empty()) { +    for (const auto &LI : MBB->liveins()) { +      for (MCRegUnitIterator Unit(LI.PhysReg, TRI); Unit.isValid(); ++Unit) { +        // Treat function live-ins as if they were defined just before the first +        // instruction.  Usually, function arguments are set up immediately +        // before the call. +        LiveRegs[*Unit] = -1; +        MBBReachingDefs[MBBNumber][*Unit].push_back(LiveRegs[*Unit]); +      } +    } +    LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); +    return; +  } + +  // Try to coalesce live-out registers from predecessors. +  for (MachineBasicBlock *pred : MBB->predecessors()) { +    assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && +           "Should have pre-allocated MBBInfos for all MBBs"); +    const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; +    // Incoming is null if this is a backedge from a BB +    // we haven't processed yet +    if (Incoming.empty()) +      continue; + +    for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) { +      // Use the most recent predecessor def for each register. +      LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]); +      if ((LiveRegs[Unit] != ReachingDefDefaultVal)) +        MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]); +    } +  } + +  LLVM_DEBUG(dbgs() << printMBBReference(*MBB) +                    << (!TraversedMBB.IsDone ? ": incomplete\n" +                                             : ": all preds known\n")); +} + +void ReachingDefAnalysis::leaveBasicBlock( +    const LoopTraversal::TraversedMBBInfo &TraversedMBB) { +  assert(!LiveRegs.empty() && "Must enter basic block first."); +  unsigned MBBNumber = TraversedMBB.MBB->getNumber(); +  assert(MBBNumber < MBBOutRegsInfos.size() && +         "Unexpected basic block number."); +  // Save register clearances at end of MBB - used by enterBasicBlock(). +  MBBOutRegsInfos[MBBNumber] = LiveRegs; + +  // While processing the basic block, we kept `Def` relative to the start +  // of the basic block for convenience. However, future use of this information +  // only cares about the clearance from the end of the block, so adjust +  // everything to be relative to the end of the basic block. +  for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber]) +    OutLiveReg -= CurInstr; +  LiveRegs.clear(); +} + +void ReachingDefAnalysis::processDefs(MachineInstr *MI) { +  assert(!MI->isDebugInstr() && "Won't process debug instructions"); + +  unsigned MBBNumber = MI->getParent()->getNumber(); +  assert(MBBNumber < MBBReachingDefs.size() && +         "Unexpected basic block number."); +  const MCInstrDesc &MCID = MI->getDesc(); +  for (unsigned i = 0, +                e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); +       i != e; ++i) { +    MachineOperand &MO = MI->getOperand(i); +    if (!MO.isReg() || !MO.getReg()) +      continue; +    if (MO.isUse()) +      continue; +    for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) { +      // This instruction explicitly defines the current reg unit. +      LLVM_DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr +                        << '\t' << *MI); + +      // How many instructions since this reg unit was last written? +      LiveRegs[*Unit] = CurInstr; +      MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr); +    } +  } +  InstIds[MI] = CurInstr; +  ++CurInstr; +} + +void ReachingDefAnalysis::processBasicBlock( +    const LoopTraversal::TraversedMBBInfo &TraversedMBB) { +  enterBasicBlock(TraversedMBB); +  for (MachineInstr &MI : *TraversedMBB.MBB) { +    if (!MI.isDebugInstr()) +      processDefs(&MI); +  } +  leaveBasicBlock(TraversedMBB); +} + +bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) { +  if (skipFunction(mf.getFunction())) +    return false; +  MF = &mf; +  TRI = MF->getSubtarget().getRegisterInfo(); + +  LiveRegs.clear(); +  NumRegUnits = TRI->getNumRegUnits(); + +  MBBReachingDefs.resize(mf.getNumBlockIDs()); + +  LLVM_DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n"); + +  // Initialize the MBBOutRegsInfos +  MBBOutRegsInfos.resize(mf.getNumBlockIDs()); + +  // Traverse the basic blocks. +  LoopTraversal Traversal; +  LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); +  for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { +    processBasicBlock(TraversedMBB); +  } + +  // Sorting all reaching defs found for a ceartin reg unit in a given BB. +  for (MBBDefsInfo &MBBDefs : MBBReachingDefs) { +    for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) +      llvm::sort(RegUnitDefs.begin(), RegUnitDefs.end()); +  } + +  return false; +} + +void ReachingDefAnalysis::releaseMemory() { +  // Clear the internal vectors. +  MBBOutRegsInfos.clear(); +  MBBReachingDefs.clear(); +  InstIds.clear(); +} + +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { +  assert(InstIds.count(MI) && "Unexpected machine instuction."); +  int InstId = InstIds[MI]; +  int DefRes = ReachingDefDefaultVal; +  unsigned MBBNumber = MI->getParent()->getNumber(); +  assert(MBBNumber < MBBReachingDefs.size() && +         "Unexpected basic block number."); +  int LatestDef = ReachingDefDefaultVal; +  for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { +    for (int Def : MBBReachingDefs[MBBNumber][*Unit]) { +      if (Def >= InstId) +        break; +      DefRes = Def; +    } +    LatestDef = std::max(LatestDef, DefRes); +  } +  return LatestDef; +} + +int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { +  assert(InstIds.count(MI) && "Unexpected machine instuction."); +  return InstIds[MI] - getReachingDef(MI, PhysReg); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index 74c1592634aa..bc28a054c680 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -91,7 +91,7 @@ void RegAllocBase::allocatePhysRegs() {      // Unused registers can appear when the spiller coalesces snippets.      if (MRI->reg_nodbg_empty(VirtReg->reg)) { -      DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); +      LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');        aboutToRemoveInterval(*VirtReg);        LIS->removeInterval(VirtReg->reg);        continue; @@ -103,9 +103,9 @@ void RegAllocBase::allocatePhysRegs() {      // selectOrSplit requests the allocator to return an available physical      // register if possible and populate a list of new live intervals that      // result from splitting. -    DEBUG(dbgs() << "\nselectOrSplit " -          << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) -          << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); +    LLVM_DEBUG(dbgs() << "\nselectOrSplit " +                      << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) +                      << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');      using VirtRegVec = SmallVector<unsigned, 4>; @@ -145,12 +145,12 @@ void RegAllocBase::allocatePhysRegs() {        assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");        if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {          assert(SplitVirtReg->empty() && "Non-empty but used interval"); -        DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n'); +        LLVM_DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n');          aboutToRemoveInterval(*SplitVirtReg);          LIS->removeInterval(SplitVirtReg->reg);          continue;        } -      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); +      LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");        assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&               "expect split value in virtual register");        enqueue(SplitVirtReg); diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 1125d2c62bef..daeff3fc3963 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -219,8 +219,8 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,        Intfs.push_back(Intf);      }    } -  DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI) -               << " interferences with " << VirtReg << "\n"); +  LLVM_DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI) +                    << " interferences with " << VirtReg << "\n");    assert(!Intfs.empty() && "expected interference");    // Spill each interfering vreg allocated to PhysReg or an alias. @@ -292,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,    }    // No other spill candidates were found, so spill the current VirtReg. -  DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); +  LLVM_DEBUG(dbgs() << "spilling: " << VirtReg << '\n');    if (!VirtReg.isSpillable())      return ~0u;    LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); @@ -304,9 +304,8 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,  }  bool RABasic::runOnMachineFunction(MachineFunction &mf) { -  DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" -               << "********** Function: " -               << mf.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" +                    << "********** Function: " << mf.getName() << '\n');    MF = &mf;    RegAllocBase::init(getAnalysis<VirtRegMap>(), @@ -323,7 +322,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {    postOptimization();    // Diagnostic output before rewriting -  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); +  LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");    releaseMemory();    return true; diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index 17d9492d942e..7b57c6cbcdb8 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -179,7 +179,7 @@ namespace {      }    private: -    bool runOnMachineFunction(MachineFunction &Fn) override; +    bool runOnMachineFunction(MachineFunction &MF) override;      void allocateBasicBlock(MachineBasicBlock &MBB);      void handleThroughOperands(MachineInstr &MI,                                 SmallVectorImpl<unsigned> &VirtDead); @@ -206,7 +206,7 @@ namespace {        return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));      } -    LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, MCPhysReg PhysReg); +    LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, MCPhysReg PhysReg);      LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,                                        unsigned Hint);      LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum, @@ -322,11 +322,11 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,      // instruction, not on the spill.      bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;      LR.Dirty = false; -    DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) -                 << " in " << printReg(LR.PhysReg, TRI)); +    LLVM_DEBUG(dbgs() << "Spilling " << printReg(LRI->VirtReg, TRI) << " in " +                      << printReg(LR.PhysReg, TRI));      const TargetRegisterClass &RC = *MRI->getRegClass(LRI->VirtReg);      int FI = getStackSpaceFor(LRI->VirtReg, RC); -    DEBUG(dbgs() << " to stack slot #" << FI << "\n"); +    LLVM_DEBUG(dbgs() << " to stack slot #" << FI << "\n");      TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, &RC, TRI);      ++NumStores;   // Update statistics @@ -339,7 +339,9 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,        MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI);        assert(NewDV->getParent() == MBB && "dangling parent pointer");        (void)NewDV; -      DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); +      LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:" +                        << "\n" +                        << *NewDV);      }      // Now this register is spilled there is should not be any DBG_VALUE      // pointing to this register because they are all pointing to spilled value @@ -470,13 +472,14 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,    }  } -/// \brief Return the cost of spilling clearing out PhysReg and aliases so it is +/// Return the cost of spilling clearing out PhysReg and aliases so it is  /// free for allocation. Returns 0 when PhysReg is free or disabled with all  /// aliases disabled - it can be allocated directly.  /// \returns spillImpossible when PhysReg or an alias can't be spilled.  unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {    if (isRegUsedInInstr(PhysReg)) { -    DEBUG(dbgs() << printReg(PhysReg, TRI) << " is already used in instr.\n"); +    LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) +                      << " is already used in instr.\n");      return spillImpossible;    }    switch (unsigned VirtReg = PhysRegState[PhysReg]) { @@ -485,8 +488,8 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {    case regFree:      return 0;    case regReserved: -    DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " -                 << printReg(PhysReg, TRI) << " is reserved already.\n"); +    LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " +                      << printReg(PhysReg, TRI) << " is reserved already.\n");      return spillImpossible;    default: {      LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); @@ -496,7 +499,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {    }    // This is a disabled register, add up cost of aliases. -  DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); +  LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");    unsigned Cost = 0;    for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {      MCPhysReg Alias = *AI; @@ -519,12 +522,12 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {    return Cost;  } -/// \brief This method updates local state so that we know that PhysReg is the +/// This method updates local state so that we know that PhysReg is the  /// proper container for VirtReg now.  The physical register must not be used  /// for anything else when this is called.  void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { -  DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to " -               << printReg(PhysReg, TRI) << "\n"); +  LLVM_DEBUG(dbgs() << "Assigning " << printReg(LR.VirtReg, TRI) << " to " +                    << printReg(PhysReg, TRI) << "\n");    PhysRegState[PhysReg] = LR.VirtReg;    assert(!LR.PhysReg && "Already assigned a physreg");    LR.PhysReg = PhysReg; @@ -570,16 +573,16 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::allocVirtReg(MachineInstr &MI,      }    } -  DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from " -               << TRI->getRegClassName(&RC) << "\n"); +  LLVM_DEBUG(dbgs() << "Allocating " << printReg(VirtReg) << " from " +                    << TRI->getRegClassName(&RC) << "\n");    unsigned BestReg = 0;    unsigned BestCost = spillImpossible;    for (MCPhysReg PhysReg : AO) {      unsigned Cost = calcSpillCost(PhysReg); -    DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n"); -    DEBUG(dbgs() << "\tCost: " << Cost << "\n"); -    DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); +    LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << "\n"); +    LLVM_DEBUG(dbgs() << "\tCost: " << Cost << "\n"); +    LLVM_DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");      // Cost is 0 when all aliases are already disabled.      if (Cost == 0) {        assignVirtToPhysReg(*LRI, PhysReg); @@ -654,22 +657,22 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,      LRI = allocVirtReg(MI, LRI, Hint);      const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);      int FrameIndex = getStackSpaceFor(VirtReg, RC); -    DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " -                 << printReg(LRI->PhysReg, TRI) << "\n"); +    LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into " +                      << printReg(LRI->PhysReg, TRI) << "\n");      TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, &RC, TRI);      ++NumLoads;    } else if (LRI->Dirty) {      if (isLastUseOfLocalReg(MO)) { -      DEBUG(dbgs() << "Killing last use: " << MO << "\n"); +      LLVM_DEBUG(dbgs() << "Killing last use: " << MO << "\n");        if (MO.isUse())          MO.setIsKill();        else          MO.setIsDead();      } else if (MO.isKill()) { -      DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); +      LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");        MO.setIsKill(false);      } else if (MO.isDead()) { -      DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); +      LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");        MO.setIsDead(false);      }    } else if (MO.isKill()) { @@ -677,10 +680,10 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI,      // register would be killed immediately, and there might be a second use:      //   %foo = OR killed %x, %x      // This would cause a second reload of %x into a different register. -    DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); +    LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");      MO.setIsKill(false);    } else if (MO.isDead()) { -    DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); +    LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");      MO.setIsDead(false);    }    assert(LRI->PhysReg && "Register not assigned"); @@ -699,13 +702,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,    bool Dead = MO.isDead();    if (!MO.getSubReg()) {      MO.setReg(PhysReg); -    MO.setIsRenamableIfNoExtraRegAllocReq(); +    MO.setIsRenamable(true);      return MO.isKill() || Dead;    }    // Handle subregister index.    MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); -  MO.setIsRenamableIfNoExtraRegAllocReq(); +  MO.setIsRenamable(true);    MO.setSubReg(0);    // A kill flag implies killing the full register. Add corresponding super @@ -727,7 +730,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum,  // there are additional physreg defines.  void RegAllocFast::handleThroughOperands(MachineInstr &MI,                                           SmallVectorImpl<unsigned> &VirtDead) { -  DEBUG(dbgs() << "Scanning for through registers:"); +  LLVM_DEBUG(dbgs() << "Scanning for through registers:");    SmallSet<unsigned, 8> ThroughRegs;    for (const MachineOperand &MO : MI.operands()) {      if (!MO.isReg()) continue; @@ -737,13 +740,13 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,      if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||          (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {        if (ThroughRegs.insert(Reg).second) -        DEBUG(dbgs() << ' ' << printReg(Reg)); +        LLVM_DEBUG(dbgs() << ' ' << printReg(Reg));      }    }    // If any physreg defines collide with preallocated through registers,    // we must spill and reallocate. -  DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); +  LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");    for (const MachineOperand &MO : MI.operands()) {      if (!MO.isReg() || !MO.isDef()) continue;      unsigned Reg = MO.getReg(); @@ -756,7 +759,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,    }    SmallVector<unsigned, 8> PartialDefs; -  DEBUG(dbgs() << "Allocating tied uses.\n"); +  LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {      const MachineOperand &MO = MI.getOperand(I);      if (!MO.isReg()) continue; @@ -764,15 +767,16 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,      if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;      if (MO.isUse()) {        if (!MO.isTied()) continue; -      DEBUG(dbgs() << "Operand " << I << "("<< MO << ") is tied to operand " -        << MI.findTiedOperandIdx(I) << ".\n"); +      LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO +                        << ") is tied to operand " << MI.findTiedOperandIdx(I) +                        << ".\n");        LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0);        MCPhysReg PhysReg = LRI->PhysReg;        setPhysReg(MI, I, PhysReg);        // Note: we don't update the def operand yet. That would cause the normal        // def-scan to attempt spilling.      } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { -      DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); +      LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << "\n");        // Reload the register, but don't assign to the operand just yet.        // That would confuse the later phys-def processing pass.        LiveRegMap::iterator LRI = reloadVirtReg(MI, I, Reg, 0); @@ -780,7 +784,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,      }    } -  DEBUG(dbgs() << "Allocating early clobbers.\n"); +  LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n");    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {      const MachineOperand &MO = MI.getOperand(I);      if (!MO.isReg()) continue; @@ -801,8 +805,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,      if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;      unsigned Reg = MO.getReg();      if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; -    DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) -                 << " as used in instr\n"); +    LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) +                      << " as used in instr\n");      markRegUsedInInstr(Reg);    } @@ -848,7 +852,7 @@ void RegAllocFast::dumpState() {  void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {    this->MBB = &MBB; -  DEBUG(dbgs() << "\nAllocating " << MBB); +  LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);    PhysRegState.assign(TRI->getNumRegs(), regDisabled);    assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); @@ -866,10 +870,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {    // Otherwise, sequentially allocate each instruction in the MBB.    for (MachineInstr &MI : MBB) {      const MCInstrDesc &MCID = MI.getDesc(); -    DEBUG( -      dbgs() << "\n>> " << MI << "Regs:"; -      dumpState() -    ); +    LLVM_DEBUG(dbgs() << "\n>> " << MI << "Regs:"; dumpState());      // Debug values are not allowed to change codegen in any way.      if (MI.isDebugValue()) { @@ -894,13 +895,13 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {          if (SS != -1) {            // Modify DBG_VALUE now that the value is in a spill slot.            updateDbgValueForSpill(*DebugMI, SS); -          DEBUG(dbgs() << "Modifying debug info due to spill:" -                       << "\t" << *DebugMI); +          LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" +                            << "\t" << *DebugMI);            continue;          }          // We can't allocate a physreg for a DebugValue, sorry! -        DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); +        LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");          MO.setReg(0);        } @@ -910,6 +911,9 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {        continue;      } +    if (MI.isDebugLabel()) +      continue; +      // If this is a copy, we may be able to coalesce.      unsigned CopySrcReg = 0;      unsigned CopyDstReg = 0; @@ -1025,7 +1029,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {        // as call-clobbered, this is not correct because some of those        // definitions may be used later on and we do not want to reuse        // those for virtual registers in between. -      DEBUG(dbgs() << "  Spilling remaining registers before call.\n"); +      LLVM_DEBUG(dbgs() << "  Spilling remaining registers before call.\n");        spillAll(MI);      } @@ -1060,15 +1064,15 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {      VirtDead.clear();      if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { -      DEBUG(dbgs() << "-- coalescing: " << MI); +      LLVM_DEBUG(dbgs() << "-- coalescing: " << MI);        Coalesced.push_back(&MI);      } else { -      DEBUG(dbgs() << "<< " << MI); +      LLVM_DEBUG(dbgs() << "<< " << MI);      }    }    // Spill all physical registers holding virtual registers now. -  DEBUG(dbgs() << "Spilling live registers at end of block.\n"); +  LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");    spillAll(MBB.getFirstTerminator());    // Erase all the coalesced copies. We are delaying it until now because @@ -1077,13 +1081,13 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {      MBB.erase(MI);    NumCopies += Coalesced.size(); -  DEBUG(MBB.dump()); +  LLVM_DEBUG(MBB.dump());  }  /// Allocates registers for a function.  bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) { -  DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" -               << "********** Function: " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" +                    << "********** Function: " << MF.getName() << '\n');    MRI = &MF.getRegInfo();    const TargetSubtargetInfo &STI = MF.getSubtarget();    TRI = STI.getRegisterInfo(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index e492c481a540..3333e1f2fb8b 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -125,6 +125,12 @@ static cl::opt<bool> EnableDeferredSpilling(               "variable because of other evicted variables."),      cl::init(false)); +static cl::opt<unsigned> +    HugeSizeForSplit("huge-size-for-split", cl::Hidden, +                     cl::desc("A threshold of live range size which may cause " +                              "high compile time cost in global splitting."), +                     cl::init(5000)); +  // FIXME: Find a good default for this flag and remove the flag.  static cl::opt<unsigned>  CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -292,7 +298,7 @@ class RAGreedy : public MachineFunctionPass,    public:      using EvictorInfo =          std::pair<unsigned /* evictor */, unsigned /* physreg */>; -    using EvicteeInfo = llvm::MapVector<unsigned /* evictee */, EvictorInfo>; +    using EvicteeInfo = llvm::DenseMap<unsigned /* evictee */, EvictorInfo>;    private:      /// Each Vreg that has been evicted in the last stage of selectOrSplit will @@ -300,28 +306,28 @@ class RAGreedy : public MachineFunctionPass,      EvicteeInfo Evictees;    public: -    /// \brief Clear all eviction information. +    /// Clear all eviction information.      void clear() { Evictees.clear(); } -    /// \brief  Clear eviction information for the given evictee Vreg. +    ///  Clear eviction information for the given evictee Vreg.      /// E.g. when Vreg get's a new allocation, the old eviction info is no      /// longer relevant.      /// \param Evictee The evictee Vreg for whom we want to clear collected      /// eviction info.      void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); } -    /// \brief Track new eviction. +    /// Track new eviction.      /// The Evictor vreg has evicted the Evictee vreg from Physreg. -    /// \praram PhysReg The phisical register Evictee was evicted from. -    /// \praram Evictor The evictor Vreg that evicted Evictee. -    /// \praram Evictee The evictee Vreg. +    /// \param PhysReg The phisical register Evictee was evicted from. +    /// \param Evictor The evictor Vreg that evicted Evictee. +    /// \param Evictee The evictee Vreg.      void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {        Evictees[Evictee].first = Evictor;        Evictees[Evictee].second = PhysReg;      }      /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. -    /// \praram Evictee The evictee vreg. +    /// \param Evictee The evictee vreg.      /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if      /// nobody has evicted Evictee from PhysReg.      EvictorInfo getEvictor(unsigned Evictee) { @@ -399,7 +405,7 @@ class RAGreedy : public MachineFunctionPass,    /// obtained from the TargetSubtargetInfo.    bool EnableLocalReassign; -  /// Enable or not the the consideration of the cost of local intervals created +  /// Enable or not the consideration of the cost of local intervals created    /// by a split candidate when choosing the best split candidate.    bool EnableAdvancedRASplitCost; @@ -448,13 +454,16 @@ private:    bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,                                    unsigned BBNumber,                                    const AllocationOrder &Order); +  bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, +                               GlobalSplitCandidate &Cand, unsigned BBNumber, +                               const AllocationOrder &Order);    BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,                                       const AllocationOrder &Order,                                       bool *CanCauseEvictionChain);    bool calcCompactRegion(GlobalSplitCandidate&);    void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);    void calcGapWeights(unsigned, SmallVectorImpl<float>&); -  unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); +  unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg);    bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);    bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);    bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg, @@ -475,6 +484,7 @@ private:                      SmallVectorImpl<unsigned>&, unsigned = ~0u);    unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,                            SmallVectorImpl<unsigned>&); +  unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg);    /// Calculate cost of region splitting.    unsigned calculateRegionSplitCost(LiveInterval &VirtReg,                                      AllocationOrder &Order, @@ -763,7 +773,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,    // preferred register.    if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))      if (Order.isHint(Hint)) { -      DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n'); +      LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');        EvictionCost MaxCost;        MaxCost.setBrokenHints(1);        if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -782,8 +792,8 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,    if (!Cost)      return PhysReg; -  DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " << Cost -               << '\n'); +  LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost " +                    << Cost << '\n');    unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);    return CheapReg ? CheapReg : PhysReg;  } @@ -811,9 +821,9 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {        break;    }    if (PhysReg) -    DEBUG(dbgs() << "can reassign: " << VirtReg << " from " -          << printReg(PrevReg, TRI) << " to " << printReg(PhysReg, TRI) -          << '\n'); +    LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from " +                      << printReg(PrevReg, TRI) << " to " +                      << printReg(PhysReg, TRI) << '\n');    return PhysReg;  } @@ -840,7 +850,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,      return true;    if (A.weight > B.weight) { -    DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); +    LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');      return true;    }    return false; @@ -934,7 +944,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,    return true;  } -/// \brief Return true if all interferences between VirtReg and PhysReg between +/// Return true if all interferences between VirtReg and PhysReg between  /// Start and End can be evicted.  ///  /// \param VirtReg Live range that is about to be assigned. @@ -986,7 +996,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,    return true;  } -/// \brief Return tthe physical register that will be best +/// Return the physical register that will be best  /// candidate for eviction by a local split interval that will be created  /// between Start and End.  /// @@ -1032,8 +1042,8 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,    if (!Cascade)      Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; -  DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) -               << " interference: Cascade " << Cascade << '\n'); +  LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) +                    << " interference: Cascade " << Cascade << '\n');    // Collect all interfering virtregs first.    SmallVector<LiveInterval*, 8> Intfs; @@ -1104,8 +1114,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);      unsigned MinCost = RegClassInfo.getMinCost(RC);      if (MinCost >= CostPerUseLimit) { -      DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost -                   << ", no cheaper registers to be found.\n"); +      LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " +                        << MinCost << ", no cheaper registers to be found.\n");        return 0;      } @@ -1113,7 +1123,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,      // the same cost. We don't need to look at them if they're too expensive.      if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {        OrderLimit = RegClassInfo.getLastCostChange(RC); -      DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); +      LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit +                        << " regs.\n");      }    } @@ -1124,9 +1135,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,      // The first use of a callee-saved register in a function has cost 1.      // Don't start using a CSR when the CostPerUseLimit is low.      if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { -      DEBUG(dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " -            << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) -            << '\n'); +      LLVM_DEBUG( +          dbgs() << printReg(PhysReg, TRI) << " would clobber CSR " +                 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) +                 << '\n');        continue;      } @@ -1313,7 +1325,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {      // Perhaps iterating can enable more bundles?      SpillPlacer->iterate();    } -  DEBUG(dbgs() << ", v=" << Visited); +  LLVM_DEBUG(dbgs() << ", v=" << Visited);  }  /// calcCompactRegion - Compute the set of edge bundles that should be live @@ -1331,7 +1343,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {    // Compact regions don't correspond to any physreg.    Cand.reset(IntfCache, 0); -  DEBUG(dbgs() << "Compact region bundles"); +  LLVM_DEBUG(dbgs() << "Compact region bundles");    // Use the spill placer to determine the live bundles. GrowRegion pretends    // that all the through blocks have interference when PhysReg is unset. @@ -1340,7 +1352,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {    // The static split cost will be zero since Cand.Intf reports no interference.    BlockFrequency Cost;    if (!addSplitConstraints(Cand.Intf, Cost)) { -    DEBUG(dbgs() << ", none.\n"); +    LLVM_DEBUG(dbgs() << ", none.\n");      return false;    } @@ -1348,11 +1360,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {    SpillPlacer->finish();    if (!Cand.LiveBundles.any()) { -    DEBUG(dbgs() << ", none.\n"); +    LLVM_DEBUG(dbgs() << ", none.\n");      return false;    } -  DEBUG({ +  LLVM_DEBUG({      for (int i : Cand.LiveBundles.set_bits())        dbgs() << " EB#" << i;      dbgs() << ".\n"; @@ -1378,7 +1390,7 @@ BlockFrequency RAGreedy::calcSpillCost() {    return Cost;  } -/// \brief Check if splitting Evictee will create a local split interval in +/// Check if splitting Evictee will create a local split interval in  /// basic block number BBNumber that may cause a bad eviction chain. This is  /// intended to prevent bad eviction sequences like:  /// movl	%ebp, 8(%esp)           # 4-byte Spill @@ -1401,7 +1413,7 @@ BlockFrequency RAGreedy::calcSpillCost() {  /// Evictee %0 is intended for region splitting with split candidate  /// physreg0 (the reg %0 was evicted from).  /// Region splitting creates a local interval because of interference with the -/// evictor %1 (normally region spliitting creates 2 interval, the "by reg" +/// evictor %1 (normally region splitting creates 2 interval, the "by reg"  /// and "by stack" intervals and local interval created when interference  /// occurs).  /// One of the split intervals ends up evicting %2 from physreg1. @@ -1427,7 +1439,7 @@ BlockFrequency RAGreedy::calcSpillCost() {  ///                 we are splitting for and the interferences.  /// \param BBNumber The number of a BB for which the region split process will  ///                 create a local split interval. -/// \param Order    The phisical registers that may get evicted by a split +/// \param Order    The physical registers that may get evicted by a split  ///                 artifact of Evictee.  /// \return True if splitting Evictee may cause a bad eviction chain, false  /// otherwise. @@ -1448,8 +1460,8 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,        getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),                                 Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); -  // The bad eviction chain occurs when either the split candidate the the -  // evited reg or one of the split artifact will evict the evicting reg. +  // The bad eviction chain occurs when either the split candidate is the +  // evicting reg or one of the split artifact will evict the evicting reg.    if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))      return false; @@ -1479,6 +1491,54 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,    return true;  } +/// Check if splitting VirtRegToSplit will create a local split interval +/// in basic block number BBNumber that may cause a spill. +/// +/// \param VirtRegToSplit The register considered to be split. +/// \param Cand           The split candidate that determines the physical +///                       register we are splitting for and the interferences. +/// \param BBNumber       The number of a BB for which the region split process +///                       will create a local split interval. +/// \param Order          The physical registers that may get evicted by a +///                       split artifact of VirtRegToSplit. +/// \return True if splitting VirtRegToSplit may cause a spill, false +/// otherwise. +bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, +                                       GlobalSplitCandidate &Cand, +                                       unsigned BBNumber, +                                       const AllocationOrder &Order) { +  Cand.Intf.moveToBlock(BBNumber); + +  // Check if the local interval will find a non interfereing assignment. +  for (auto PhysReg : Order.getOrder()) { +    if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(), +                                   Cand.Intf.last(), PhysReg)) +      return false; +  } + +  // Check if the local interval will evict a cheaper interval. +  float CheapestEvictWeight = 0; +  unsigned FutureEvictedPhysReg = getCheapestEvicteeWeight( +      Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(), +      Cand.Intf.last(), &CheapestEvictWeight); + +  // Have we found an interval that can be evicted? +  if (FutureEvictedPhysReg) { +    VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI); +    float splitArtifactWeight = +        VRAI.futureWeight(LIS->getInterval(VirtRegToSplit), +                          Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); +    // Will the weight of the local interval be higher than the cheapest evictee +    // weight? If so it will evict it and will not cause a spill. +    if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight) +      return false; +  } + +  // The local interval is not able to find non interferencing assignment and +  // not able to evict a less worthy interval, therfore, it can cause a spill. +  return true; +} +  /// calcGlobalSplitCost - Return the global split cost of following the split  /// pattern in LiveBundles. This cost should be added to the local cost of the  /// interference pattern in SplitConstraints. @@ -1499,19 +1559,26 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,      Cand.Intf.moveToBlock(BC.Number);      // Check wheather a local interval is going to be created during the region -    // split. -    if (EnableAdvancedRASplitCost && CanCauseEvictionChain && -        Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn && -        RegOut) { - -      if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { -        // This interfernce cause our eviction from this assignment, we might -        // evict somebody else, add that cost. +    // split. Calculate adavanced spilt cost (cost of local intervals) if option +    // is enabled. +    if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn && +        BI.LiveOut && RegIn && RegOut) { + +      if (CanCauseEvictionChain && +          splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { +        // This interference causes our eviction from this assignment, we might +        // evict somebody else and eventually someone will spill, add that cost.          // See splitCanCauseEvictionChain for detailed description of scenarios.          GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);          GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);          *CanCauseEvictionChain = true; + +      } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number, +                                         Order)) { +        // This interference causes local interval to spill, add that cost. +        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); +        GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);        }      } @@ -1540,7 +1607,7 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,          // region split.          if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&              splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { -          // This interfernce cause our eviction from this assignment, we might +          // This interference cause our eviction from this assignment, we might            // evict somebody else, add that cost.            // See splitCanCauseEvictionChain for detailed description of            // scenarios. @@ -1575,7 +1642,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,    // These are the intervals created for new global ranges. We may create more    // intervals for local ranges.    const unsigned NumGlobalIntvs = LREdit.size(); -  DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); +  LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs +                    << " globals.\n");    assert(NumGlobalIntvs && "No global intervals configured");    // Isolate even single instructions when dealing with a proper sub-class. @@ -1612,7 +1680,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,      // Create separate intervals for isolated blocks with multiple uses.      if (!IntvIn && !IntvOut) { -      DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n"); +      LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");        if (SA->shouldSplitSingleBlock(BI, SingleInstrs))          SE->splitSingleBlock(BI);        continue; @@ -1694,8 +1762,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,      // blocks is strictly decreasing.      if (IntvMap[i] < NumGlobalIntvs) {        if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { -        DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks -                     << " blocks as original.\n"); +        LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks +                          << " blocks as original.\n");          // Don't allow repeated splitting as a safe guard against looping.          setStage(Reg, RS_Split2);        } @@ -1710,8 +1778,21 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,      MF->verify(this, "After splitting live range around region");  } +// Global split has high compile time cost especially for large live range. +// Return false for the case here where the potential benefit will never +// worth the cost. +unsigned RAGreedy::isSplitBenefitWorthCost(LiveInterval &VirtReg) { +  MachineInstr *MI = MRI->getUniqueVRegDef(VirtReg.reg); +  if (MI && TII->isTriviallyReMaterializable(*MI, AA) && +      VirtReg.size() > HugeSizeForSplit) +    return false; +  return true; +} +  unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,                                    SmallVectorImpl<unsigned> &NewVRegs) { +  if (!isSplitBenefitWorthCost(VirtReg)) +    return 0;    unsigned NumCands = 0;    BlockFrequency SpillCost = calcSpillCost();    BlockFrequency BestCost; @@ -1726,8 +1807,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,      // No benefit from the compact region, our fallback will be per-block      // splitting. Make sure we find a solution that is cheaper than spilling.      BestCost = SpillCost; -    DEBUG(dbgs() << "Cost of isolating all blocks = "; -                 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); +    LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "; +               MBFI->printBlockFreq(dbgs(), BestCost) << '\n');    }    bool CanCauseEvictionChain = false; @@ -1790,13 +1871,13 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,      SpillPlacer->prepare(Cand.LiveBundles);      BlockFrequency Cost;      if (!addSplitConstraints(Cand.Intf, Cost)) { -      DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); +      LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");        continue;      } -    DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; -                 MBFI->printBlockFreq(dbgs(), Cost)); +    LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; +               MBFI->printBlockFreq(dbgs(), Cost));      if (Cost >= BestCost) { -      DEBUG({ +      LLVM_DEBUG({          if (BestCand == NoCand)            dbgs() << " worse than no bundles\n";          else @@ -1811,15 +1892,15 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,      // No live bundles, defer to splitSingleBlocks().      if (!Cand.LiveBundles.any()) { -      DEBUG(dbgs() << " no bundles.\n"); +      LLVM_DEBUG(dbgs() << " no bundles.\n");        continue;      }      bool HasEvictionChain = false;      Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); -    DEBUG({ -      dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) -                                << " with bundles"; +    LLVM_DEBUG({ +      dbgs() << ", total = "; +      MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";        for (int i : Cand.LiveBundles.set_bits())          dbgs() << " EB#" << i;        dbgs() << ".\n"; @@ -1838,11 +1919,11 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,    if (CanCauseEvictionChain && BestCand != NoCand) {      // See splitCanCauseEvictionChain for detailed description of bad      // eviction chain scenarios. -    DEBUG(dbgs() << "Best split candidate of vreg " -                 << printReg(VirtReg.reg, TRI) << "  may "); +    LLVM_DEBUG(dbgs() << "Best split candidate of vreg " +                      << printReg(VirtReg.reg, TRI) << "  may ");      if (!(*CanCauseEvictionChain)) -      DEBUG(dbgs() << "not "); -    DEBUG(dbgs() << "cause bad eviction chain\n"); +      LLVM_DEBUG(dbgs() << "not "); +    LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");    }    return BestCand; @@ -1865,8 +1946,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,      if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {        UsedCands.push_back(BestCand);        Cand.IntvIdx = SE->openIntv(); -      DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in " -                   << B << " bundles, intv " << Cand.IntvIdx << ".\n"); +      LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in " +                        << B << " bundles, intv " << Cand.IntvIdx << ".\n");        (void)B;      }    } @@ -1878,8 +1959,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,      if (unsigned B = Cand.getBundles(BundleCand, 0)) {        UsedCands.push_back(0);        Cand.IntvIdx = SE->openIntv(); -      DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " -                   << Cand.IntvIdx << ".\n"); +      LLVM_DEBUG(dbgs() << "Split for compact region in " << B +                        << " bundles, intv " << Cand.IntvIdx << ".\n");        (void)B;      }    } @@ -1978,7 +2059,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,    if (Uses.size() <= 1)      return 0; -  DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); +  LLVM_DEBUG(dbgs() << "Split around " << Uses.size() +                    << " individual instrs.\n");    const TargetRegisterClass *SuperRC =        TRI->getLargestLegalSuperClass(CurRC, *MF); @@ -1993,7 +2075,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,            SuperRCNumAllocatableRegs ==                getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,                                                    TRI, RCI)) { -        DEBUG(dbgs() << "    skip:\t" << Uses[i] << '\t' << *MI); +        LLVM_DEBUG(dbgs() << "    skip:\t" << Uses[i] << '\t' << *MI);          continue;        }      SE->openIntv(); @@ -2003,7 +2085,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,    }    if (LREdit.empty()) { -    DEBUG(dbgs() << "All uses were copies.\n"); +    LLVM_DEBUG(dbgs() << "All uses were copies.\n");      return 0;    } @@ -2121,7 +2203,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,      return 0;    const unsigned NumGaps = Uses.size()-1; -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "tryLocalSplit: ";      for (unsigned i = 0, e = Uses.size(); i != e; ++i)        dbgs() << ' ' << Uses[i]; @@ -2134,7 +2216,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,    if (Matrix->checkRegMaskInterference(VirtReg)) {      // Get regmask slots for the whole block.      ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); -    DEBUG(dbgs() << RMS.size() << " regmasks in block:"); +    LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");      // Constrain to VirtReg's live range.      unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),                                     Uses.front().getRegSlot()) - RMS.begin(); @@ -2148,14 +2230,15 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,        // overlap the live range.        if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)          break; -      DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); +      LLVM_DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' +                        << Uses[i + 1]);        RegMaskGaps.push_back(i);        // Advance ri to the next gap. A regmask on one of the uses counts in        // both gaps.        while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))          ++ri;      } -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');    }    // Since we allow local split results to be split again, there is a risk of @@ -2214,13 +2297,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,        const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;        const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; -      DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' -                   << Uses[SplitBefore] << '-' << Uses[SplitAfter] -                   << " i=" << MaxGap); +      LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore] +                        << '-' << Uses[SplitAfter] << " i=" << MaxGap);        // Stop before the interval gets so big we wouldn't be making progress.        if (!LiveBefore && !LiveAfter) { -        DEBUG(dbgs() << " all\n"); +        LLVM_DEBUG(dbgs() << " all\n");          break;        }        // Should the interval be extended or shrunk? @@ -2245,12 +2327,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,              1);          // Would this split be possible to allocate?          // Never allocate all gaps, we wouldn't be making progress. -        DEBUG(dbgs() << " w=" << EstWeight); +        LLVM_DEBUG(dbgs() << " w=" << EstWeight);          if (EstWeight * Hysteresis >= MaxGap) {            Shrink = false;            float Diff = EstWeight - MaxGap;            if (Diff > BestDiff) { -            DEBUG(dbgs() << " (best)"); +            LLVM_DEBUG(dbgs() << " (best)");              BestDiff = Hysteresis * Diff;              BestBefore = SplitBefore;              BestAfter = SplitAfter; @@ -2261,7 +2343,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,        // Try to shrink.        if (Shrink) {          if (++SplitBefore < SplitAfter) { -          DEBUG(dbgs() << " shrink\n"); +          LLVM_DEBUG(dbgs() << " shrink\n");            // Recompute the max when necessary.            if (GapWeight[SplitBefore - 1] >= MaxGap) {              MaxGap = GapWeight[SplitBefore]; @@ -2275,11 +2357,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,        // Try to extend the interval.        if (SplitAfter >= NumGaps) { -        DEBUG(dbgs() << " end\n"); +        LLVM_DEBUG(dbgs() << " end\n");          break;        } -      DEBUG(dbgs() << " extend\n"); +      LLVM_DEBUG(dbgs() << " extend\n");        MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);      }    } @@ -2288,9 +2370,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,    if (BestBefore == NumGaps)      return 0; -  DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] -               << '-' << Uses[BestAfter] << ", " << BestDiff -               << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); +  LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-' +                    << Uses[BestAfter] << ", " << BestDiff << ", " +                    << (BestAfter - BestBefore + 1) << " instrs\n");    LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);    SE->reset(LREdit); @@ -2310,14 +2392,14 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,    bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;    unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;    if (NewGaps >= NumGaps) { -    DEBUG(dbgs() << "Tagging non-progress ranges: "); +    LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");      assert(!ProgressRequired && "Didn't make progress when it was required.");      for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)        if (IntvMap[i] == 1) {          setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); -        DEBUG(dbgs() << printReg(LREdit.get(i))); +        LLVM_DEBUG(dbgs() << printReg(LREdit.get(i)));        } -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');    }    ++NumLocalSplits; @@ -2410,7 +2492,7 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,      // chances are one would not be recolorable.      if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=          LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { -      DEBUG(dbgs() << "Early abort: too many interferences.\n"); +      LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");        CutOffInfo |= CO_Interf;        return false;      } @@ -2424,7 +2506,8 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,              MRI->getRegClass(Intf->reg) == CurRC) &&             !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) ||            FixedRegisters.count(Intf->reg)) { -        DEBUG(dbgs() << "Early abort: the interference is not recolorable.\n"); +        LLVM_DEBUG( +            dbgs() << "Early abort: the interference is not recolorable.\n");          return false;        }        RecoloringCandidates.insert(Intf); @@ -2477,7 +2560,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,                                             SmallVectorImpl<unsigned> &NewVRegs,                                             SmallVirtRegSet &FixedRegisters,                                             unsigned Depth) { -  DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); +  LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');    // Ranges must be Done.    assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&           "Last chance recoloring should really be last chance"); @@ -2486,7 +2569,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,    // for target with hundreds of registers.    // Indeed, in that case we may want to cut the search space earlier.    if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { -    DEBUG(dbgs() << "Abort because max depth has been reached.\n"); +    LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");      CutOffInfo |= CO_Depth;      return ~0u;    } @@ -2503,8 +2586,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,    Order.rewind();    while (unsigned PhysReg = Order.next()) { -    DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " -                 << printReg(PhysReg, TRI) << '\n'); +    LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " +                      << printReg(PhysReg, TRI) << '\n');      RecoloringCandidates.clear();      VirtRegToPhysReg.clear();      CurrentNewVRegs.clear(); @@ -2512,7 +2595,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,      // It is only possible to recolor virtual register interference.      if (Matrix->checkInterference(VirtReg, PhysReg) >          LiveRegMatrix::IK_VirtReg) { -      DEBUG(dbgs() << "Some interferences are not with virtual registers.\n"); +      LLVM_DEBUG( +          dbgs() << "Some interferences are not with virtual registers.\n");        continue;      } @@ -2521,7 +2605,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,      // the interferences.      if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,                                      FixedRegisters)) { -      DEBUG(dbgs() << "Some interferences cannot be recolored.\n"); +      LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");        continue;      } @@ -2535,7 +2619,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,        unsigned ItVirtReg = (*It)->reg;        enqueue(RecoloringQueue, *It);        assert(VRM->hasPhys(ItVirtReg) && -             "Interferences are supposed to be with allocated vairables"); +             "Interferences are supposed to be with allocated variables");        // Record the current allocation.        VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); @@ -2563,8 +2647,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,        return PhysReg;      } -    DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " -                 << printReg(PhysReg, TRI) << '\n'); +    LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " +                      << printReg(PhysReg, TRI) << '\n');      // The recoloring attempt failed, undo the changes.      FixedRegisters = SaveFixedRegisters; @@ -2611,7 +2695,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,                                         unsigned Depth) {    while (!RecoloringQueue.empty()) {      LiveInterval *LI = dequeue(RecoloringQueue); -    DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); +    LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');      unsigned PhysReg;      PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);      // When splitting happens, the live-range may actually be empty. @@ -2623,11 +2707,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,      if (!PhysReg) {        assert(LI->empty() && "Only empty live-range do not require a register"); -      DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n"); +      LLVM_DEBUG(dbgs() << "Recoloring of " << *LI +                        << " succeeded. Empty LI.\n");        continue;      } -    DEBUG(dbgs() << "Recoloring of " << *LI -                 << " succeeded with: " << printReg(PhysReg, TRI) << '\n'); +    LLVM_DEBUG(dbgs() << "Recoloring of " << *LI +                      << " succeeded with: " << printReg(PhysReg, TRI) << '\n');      Matrix->assign(*LI, PhysReg);      FixedRegisters.insert(LI->reg); @@ -2735,7 +2820,7 @@ void RAGreedy::initializeCSRCost() {      CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);  } -/// \brief Collect the hint info for \p Reg. +/// Collect the hint info for \p Reg.  /// The results are stored into \p Out.  /// \p Out is not cleared before being populated.  void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { @@ -2759,7 +2844,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {    }  } -/// \brief Using the given \p List, compute the cost of the broken hints if +/// Using the given \p List, compute the cost of the broken hints if  /// \p PhysReg was used.  /// \return The cost of \p List for \p PhysReg.  BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, @@ -2772,7 +2857,7 @@ BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,    return Cost;  } -/// \brief Using the register assigned to \p VirtReg, try to recolor +/// Using the register assigned to \p VirtReg, try to recolor  /// all the live ranges that are copy-related with \p VirtReg.  /// The recoloring is then propagated to all the live-ranges that have  /// been recolored and so on, until no more copies can be coalesced or @@ -2794,8 +2879,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {    Visited.insert(Reg);    RecoloringCandidates.push_back(Reg); -  DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) << '(' -               << printReg(PhysReg, TRI) << ")\n"); +  LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI) +                    << '(' << printReg(PhysReg, TRI) << ")\n");    do {      Reg = RecoloringCandidates.pop_back_val(); @@ -2816,8 +2901,8 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {                                  Matrix->checkInterference(LI, PhysReg)))        continue; -    DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI) -                 << ") is recolorable.\n"); +    LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI) +                      << ") is recolorable.\n");      // Gather the hint info.      Info.clear(); @@ -2825,19 +2910,20 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {      // Check if recoloring the live-range will increase the cost of the      // non-identity copies.      if (CurrPhys != PhysReg) { -      DEBUG(dbgs() << "Checking profitability:\n"); +      LLVM_DEBUG(dbgs() << "Checking profitability:\n");        BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);        BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); -      DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() -                   << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); +      LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() +                        << "\nNew Cost: " << NewCopiesCost.getFrequency() +                        << '\n');        if (OldCopiesCost < NewCopiesCost) { -        DEBUG(dbgs() << "=> Not profitable.\n"); +        LLVM_DEBUG(dbgs() << "=> Not profitable.\n");          continue;        }        // At this point, the cost is either cheaper or equal. If it is        // equal, we consider this is profitable because it may expose        // more recoloring opportunities. -      DEBUG(dbgs() << "=> Profitable.\n"); +      LLVM_DEBUG(dbgs() << "=> Profitable.\n");        // Recolor the live-range.        Matrix->unassign(LI);        Matrix->assign(LI, PhysReg); @@ -2851,7 +2937,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {    } while (!RecoloringCandidates.empty());  } -/// \brief Try to recolor broken hints. +/// Try to recolor broken hints.  /// Broken hints may be repaired by recoloring when an evicted variable  /// freed up a register for a larger live-range.  /// Consider the following example: @@ -2925,8 +3011,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,    }    LiveRangeStage Stage = getStage(VirtReg); -  DEBUG(dbgs() << StageName[Stage] -               << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); +  LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " +                    << ExtraRegInfo[VirtReg.reg].Cascade << '\n');    // Try to evict a less worthy live range, but only for ranges from the primary    // queue. The RS_Split ranges already failed to do this, and they should not @@ -2955,7 +3041,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,    // This gives a better picture of the interference to split around.    if (Stage < RS_Split) {      setStage(VirtReg, RS_Split); -    DEBUG(dbgs() << "wait for second round\n"); +    LLVM_DEBUG(dbgs() << "wait for second round\n");      NewVRegs.push_back(VirtReg.reg);      return 0;    } @@ -2984,7 +3070,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,      // We would need a deep integration with the spiller to do the      // right thing here. Anyway, that is still good for early testing.      setStage(VirtReg, RS_Memory); -    DEBUG(dbgs() << "Do as if this register is in memory\n"); +    LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");      NewVRegs.push_back(VirtReg.reg);    } else {      NamedRegionTimer T("spill", "Spiller", TimerGroupName, @@ -3070,8 +3156,8 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,  }  bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { -  DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" -               << "********** Function: " << mf.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" +                    << "********** Function: " << mf.getName() << '\n');    MF = &mf;    TRI = MF->getSubtarget().getRegisterInfo(); @@ -3106,7 +3192,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {    calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); -  DEBUG(LIS->dump()); +  LLVM_DEBUG(LIS->dump());    SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));    SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 69a879701fae..c19001c8403d 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -62,6 +62,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Module.h"  #include "llvm/MC/MCRegisterInfo.h" @@ -159,25 +160,25 @@ private:    /// always available for the remat of all the siblings of the original reg.    SmallPtrSet<MachineInstr *, 32> DeadRemats; -  /// \brief Finds the initial set of vreg intervals to allocate. +  /// Finds the initial set of vreg intervals to allocate.    void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); -  /// \brief Constructs an initial graph. +  /// Constructs an initial graph.    void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); -  /// \brief Spill the given VReg. +  /// Spill the given VReg.    void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,                   MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,                   Spiller &VRegSpiller); -  /// \brief Given a solved PBQP problem maps this solution back to a register +  /// Given a solved PBQP problem maps this solution back to a register    /// assignment.    bool mapPBQPToRegAlloc(const PBQPRAGraph &G,                           const PBQP::Solution &Solution,                           VirtRegMap &VRM,                           Spiller &VRegSpiller); -  /// \brief Postprocessing before final spilling. Sets basic block "live in" +  /// Postprocessing before final spilling. Sets basic block "live in"    /// variables.    void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,                       VirtRegMap &VRM) const; @@ -187,7 +188,7 @@ private:  char RegAllocPBQP::ID = 0; -/// @brief Set spill costs for each node in the PBQP reg-alloc graph. +/// Set spill costs for each node in the PBQP reg-alloc graph.  class SpillCosts : public PBQPRAConstraint {  public:    void apply(PBQPRAGraph &G) override { @@ -211,7 +212,7 @@ public:    }  }; -/// @brief Add interference edges between overlapping vregs. +/// Add interference edges between overlapping vregs.  class Interference : public PBQPRAConstraint {  private:    using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *; @@ -561,16 +562,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,      unsigned Reg = TargetRegisterInfo::index2VirtReg(I);      if (MRI.reg_nodbg_empty(Reg))        continue; -    LiveInterval &LI = LIS.getInterval(Reg); - -    // If this live interval is non-empty we will use pbqp to allocate it. -    // Empty intervals we allocate in a simple post-processing stage in -    // finalizeAlloc. -    if (!LI.empty()) { -      VRegsToAlloc.insert(LI.reg); -    } else { -      EmptyIntervalVRegs.insert(LI.reg); -    } +    VRegsToAlloc.insert(Reg);    }  } @@ -594,13 +586,24 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,    std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); +  std::map<unsigned, std::vector<unsigned>> VRegAllowedMap; +    while (!Worklist.empty()) {      unsigned VReg = Worklist.back();      Worklist.pop_back(); -    const TargetRegisterClass *TRC = MRI.getRegClass(VReg);      LiveInterval &VRegLI = LIS.getInterval(VReg); +    // If this is an empty interval move it to the EmptyIntervalVRegs set then +    // continue. +    if (VRegLI.empty()) { +      EmptyIntervalVRegs.insert(VRegLI.reg); +      VRegsToAlloc.erase(VRegLI.reg); +      continue; +    } + +    const TargetRegisterClass *TRC = MRI.getRegClass(VReg); +      // Record any overlaps with regmask operands.      BitVector RegMaskOverlaps;      LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); @@ -639,8 +642,22 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,        spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);        Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());        continue; +    } else +      VRegAllowedMap[VReg] = std::move(VRegAllowed); +  } + +  for (auto &KV : VRegAllowedMap) { +    auto VReg = KV.first; + +    // Move empty intervals to the EmptyIntervalVReg set. +    if (LIS.getInterval(VReg).empty()) { +      EmptyIntervalVRegs.insert(VReg); +      VRegsToAlloc.erase(VReg); +      continue;      } +    auto &VRegAllowed = KV.second; +      PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);      // Tweak cost of callee saved registers, as using then force spilling and @@ -668,8 +685,8 @@ void RegAllocPBQP::spillVReg(unsigned VReg,    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();    (void)TRI; -  DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " -               << LRE.getParent().weight << ", New vregs: "); +  LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: " +                    << LRE.getParent().weight << ", New vregs: ");    // Copy any newly inserted live intervals into the list of regs to    // allocate. @@ -677,11 +694,11 @@ void RegAllocPBQP::spillVReg(unsigned VReg,         I != E; ++I) {      const LiveInterval &LI = LIS.getInterval(*I);      assert(!LI.empty() && "Empty spill range."); -    DEBUG(dbgs() << printReg(LI.reg, &TRI) << " "); +    LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");      VRegsToAlloc.insert(LI.reg);    } -  DEBUG(dbgs() << ")\n"); +  LLVM_DEBUG(dbgs() << ")\n");  }  bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, @@ -707,8 +724,8 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,      if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {        unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; -      DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> " -            << TRI.getName(PReg) << "\n"); +      LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> " +                        << TRI.getName(PReg) << "\n");        assert(PReg != 0 && "Invalid preg selected.");        VRM.assignVirt2Phys(VReg, PReg);      } else { @@ -784,7 +801,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {    MF.getRegInfo().freezeReservedRegs(MF); -  DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); +  LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");    // Allocator main loop:    // @@ -819,7 +836,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {      unsigned Round = 0;      while (!PBQPAllocComplete) { -      DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n"); +      LLVM_DEBUG(dbgs() << "  PBQP Regalloc round " << Round << ":\n");        PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));        initializeGraph(G, VRM, *VRegSpiller); @@ -833,8 +850,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {                                      ".pbqpgraph";          std::error_code EC;          raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); -        DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" -              << GraphFileName << "\"\n"); +        LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" +                          << GraphFileName << "\"\n");          G.dump(OS);        }  #endif @@ -851,7 +868,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {    VRegsToAlloc.clear();    EmptyIntervalVRegs.clear(); -  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); +  LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");    return true;  } diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index f49ea25bbf35..f1c442ac38ae 100644 --- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -36,11 +36,8 @@ using namespace llvm;  STATISTIC(NumCSROpt,            "Number of functions optimized for callee saved registers"); -namespace llvm { -void initializeRegUsageInfoCollectorPass(PassRegistry &); -} -  namespace { +  class RegUsageInfoCollector : public MachineFunctionPass {  public:    RegUsageInfoCollector() : MachineFunctionPass(ID) { @@ -52,12 +49,21 @@ public:      return "Register Usage Information Collector Pass";    } -  void getAnalysisUsage(AnalysisUsage &AU) const override; +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<PhysicalRegisterUsageInfo>(); +    AU.setPreservesAll(); +    MachineFunctionPass::getAnalysisUsage(AU); +  }    bool runOnMachineFunction(MachineFunction &MF) override; +  // Call determineCalleeSaves and then also set the bits for subregs and +  // fully saved superregs. +  static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF); +    static char ID;  }; +  } // end of anonymous namespace  char RegUsageInfoCollector::ID = 0; @@ -72,36 +78,32 @@ FunctionPass *llvm::createRegUsageInfoCollector() {    return new RegUsageInfoCollector();  } -void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequired<PhysicalRegisterUsageInfo>(); -  AU.setPreservesAll(); -  MachineFunctionPass::getAnalysisUsage(AU); -} -  bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {    MachineRegisterInfo *MRI = &MF.getRegInfo();    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();    const TargetMachine &TM = MF.getTarget(); -  DEBUG(dbgs() << " -------------------- " << getPassName() -               << " -------------------- \n"); -  DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); +  LLVM_DEBUG(dbgs() << " -------------------- " << getPassName() +                    << " -------------------- \n"); +  LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");    std::vector<uint32_t> RegMask;    // Compute the size of the bit vector to represent all the registers.    // The bit vector is broken into 32-bit chunks, thus takes the ceil of    // the number of registers divided by 32 for the size. -  unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; -  RegMask.resize(RegMaskSize, 0xFFFFFFFF); +  unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); +  RegMask.resize(RegMaskSize, ~((uint32_t)0));    const Function &F = MF.getFunction(); -  PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); +  PhysicalRegisterUsageInfo &PRUI = getAnalysis<PhysicalRegisterUsageInfo>(); +  PRUI.setTargetMachine(TM); -  PRUI->setTargetMachine(&TM); +  LLVM_DEBUG(dbgs() << "Clobbered Registers: "); -  DEBUG(dbgs() << "Clobbered Registers: "); +  BitVector SavedRegs; +  computeCalleeSavedRegs(SavedRegs, MF);    const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();    auto SetRegAsDefined = [&RegMask] (unsigned Reg) { @@ -110,42 +112,82 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {    // Scan all the physical registers. When a register is defined in the current    // function set it and all the aliasing registers as defined in the regmask.    for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { -    // If a register is in the UsedPhysRegsMask set then mark it as defined. -    // All it's aliases will also be in the set, so we can skip setting -    // as defined all the aliases here. -    if (UsedPhysRegsMask.test(PReg)) { -      SetRegAsDefined(PReg); +    // Don't count registers that are saved and restored. +    if (SavedRegs.test(PReg))        continue; -    }      // If a register is defined by an instruction mark it as defined together -    // with all it's aliases. +    // with all it's unsaved aliases.      if (!MRI->def_empty(PReg)) {        for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI) -        SetRegAsDefined(*AI); +        if (!SavedRegs.test(*AI)) +          SetRegAsDefined(*AI); +      continue;      } +    // If a register is in the UsedPhysRegsMask set then mark it as defined. +    // All clobbered aliases will also be in the set, so we can skip setting +    // as defined all the aliases here. +    if (UsedPhysRegsMask.test(PReg)) +      SetRegAsDefined(PReg);    } -  if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { -    const uint32_t *CallPreservedMask = -        TRI->getCallPreservedMask(MF, F.getCallingConv()); -    if (CallPreservedMask) { -      // Set callee saved register as preserved. -      for (unsigned i = 0; i < RegMaskSize; ++i) -        RegMask[i] = RegMask[i] | CallPreservedMask[i]; -    } -  } else { +  if (TargetFrameLowering::isSafeForNoCSROpt(F)) {      ++NumCSROpt; -    DEBUG(dbgs() << MF.getName() -                 << " function optimized for not having CSR.\n"); +    LLVM_DEBUG(dbgs() << MF.getName() +                      << " function optimized for not having CSR.\n");    }    for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)      if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) -      DEBUG(dbgs() << printReg(PReg, TRI) << " "); +      LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " "); -  DEBUG(dbgs() << " \n----------------------------------------\n"); +  LLVM_DEBUG(dbgs() << " \n----------------------------------------\n"); -  PRUI->storeUpdateRegUsageInfo(&F, std::move(RegMask)); +  PRUI.storeUpdateRegUsageInfo(F, RegMask);    return false;  } + +void RegUsageInfoCollector:: +computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) { +  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); +  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + +  // Target will return the set of registers that it saves/restores as needed. +  SavedRegs.clear(); +  TFI.determineCalleeSaves(MF, SavedRegs); + +  // Insert subregs. +  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); +  for (unsigned i = 0; CSRegs[i]; ++i) { +    unsigned Reg = CSRegs[i]; +    if (SavedRegs.test(Reg)) +      for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR) +        SavedRegs.set(*SR); +  } + +  // Insert any register fully saved via subregisters. +  for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) { +    if (SavedRegs.test(PReg)) +      continue; + +    // Check if PReg is fully covered by its subregs. +    bool CoveredBySubRegs = false; +    for (const TargetRegisterClass *RC : TRI.regclasses()) +      if (RC->CoveredBySubRegs && RC->contains(PReg)) { +        CoveredBySubRegs = true; +        break; +      } +    if (!CoveredBySubRegs) +      continue; + +    // Add PReg to SavedRegs if all subregs are saved. +    bool AllSubRegsSaved = true; +    for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) +      if (!SavedRegs.test(*SR)) { +        AllSubRegsSaved = false; +        break; +      } +    if (AllSubRegsSaved) +      SavedRegs.set(PReg); +  } +} diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index 5b12d00e126f..256de295821d 100644 --- a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -34,10 +34,6 @@  #include <map>  #include <string> -namespace llvm { -void initializeRegUsageInfoPropagationPassPass(PassRegistry &); -} -  using namespace llvm;  #define DEBUG_TYPE "ip-regalloc" @@ -45,54 +41,56 @@ using namespace llvm;  #define RUIP_NAME "Register Usage Information Propagation"  namespace { -class RegUsageInfoPropagationPass : public MachineFunctionPass { +class RegUsageInfoPropagation : public MachineFunctionPass {  public: -  RegUsageInfoPropagationPass() : MachineFunctionPass(ID) { +  RegUsageInfoPropagation() : MachineFunctionPass(ID) {      PassRegistry &Registry = *PassRegistry::getPassRegistry(); -    initializeRegUsageInfoPropagationPassPass(Registry); +    initializeRegUsageInfoPropagationPass(Registry);    }    StringRef getPassName() const override { return RUIP_NAME; }    bool runOnMachineFunction(MachineFunction &MF) override; -  void getAnalysisUsage(AnalysisUsage &AU) const override; +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<PhysicalRegisterUsageInfo>(); +    AU.setPreservesAll(); +    MachineFunctionPass::getAnalysisUsage(AU); +  }    static char ID;  private: -  static void setRegMask(MachineInstr &MI, const uint32_t *RegMask) { +  static void setRegMask(MachineInstr &MI, ArrayRef<uint32_t> RegMask) { +    assert(RegMask.size() == +           MachineOperand::getRegMaskSize(MI.getParent()->getParent() +                                          ->getRegInfo().getTargetRegisterInfo() +                                          ->getNumRegs()) +           && "expected register mask size");      for (MachineOperand &MO : MI.operands()) {        if (MO.isRegMask()) -        MO.setRegMask(RegMask); +        MO.setRegMask(RegMask.data());      }    }  }; +  } // end of anonymous namespace -char RegUsageInfoPropagationPass::ID = 0; -INITIALIZE_PASS_BEGIN(RegUsageInfoPropagationPass, "reg-usage-propagation", +INITIALIZE_PASS_BEGIN(RegUsageInfoPropagation, "reg-usage-propagation",                        RUIP_NAME, false, false)  INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo) -INITIALIZE_PASS_END(RegUsageInfoPropagationPass, "reg-usage-propagation", +INITIALIZE_PASS_END(RegUsageInfoPropagation, "reg-usage-propagation",                      RUIP_NAME, false, false) -FunctionPass *llvm::createRegUsageInfoPropPass() { -  return new RegUsageInfoPropagationPass(); -} - -void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequired<PhysicalRegisterUsageInfo>(); -  AU.setPreservesAll(); -  MachineFunctionPass::getAnalysisUsage(AU); -} +char RegUsageInfoPropagation::ID = 0;  // Assumes call instructions have a single reference to a function. -static const Function *findCalledFunction(const Module &M, MachineInstr &MI) { -  for (MachineOperand &MO : MI.operands()) { +static const Function *findCalledFunction(const Module &M, +                                          const MachineInstr &MI) { +  for (const MachineOperand &MO : MI.operands()) {      if (MO.isGlobal()) -      return dyn_cast<Function>(MO.getGlobal()); +      return dyn_cast<const Function>(MO.getGlobal());      if (MO.isSymbol())        return M.getFunction(MO.getSymbolName()); @@ -101,13 +99,13 @@ static const Function *findCalledFunction(const Module &M, MachineInstr &MI) {    return nullptr;  } -bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { -  const Module *M = MF.getFunction().getParent(); +bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) { +  const Module &M = *MF.getFunction().getParent();    PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); -  DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName() -               << " ++++++++++++++++++++  \n"); -  DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n"); +  LLVM_DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName() +                    << " ++++++++++++++++++++  \n"); +  LLVM_DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");    const MachineFrameInfo &MFI = MF.getFrameInfo();    if (!MFI.hasCalls() && !MFI.hasTailCall()) @@ -119,30 +117,37 @@ bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {      for (MachineInstr &MI : MBB) {        if (!MI.isCall())          continue; -      DEBUG(dbgs() -            << "Call Instruction Before Register Usage Info Propagation : \n"); -      DEBUG(dbgs() << MI << "\n"); - -      auto UpdateRegMask = [&](const Function *F) { -        const auto *RegMask = PRUI->getRegUsageInfo(F); -        if (!RegMask) +      LLVM_DEBUG( +          dbgs() +          << "Call Instruction Before Register Usage Info Propagation : \n"); +      LLVM_DEBUG(dbgs() << MI << "\n"); + +      auto UpdateRegMask = [&](const Function &F) { +        const ArrayRef<uint32_t> RegMask = PRUI->getRegUsageInfo(F); +        if (RegMask.empty())            return; -        setRegMask(MI, &(*RegMask)[0]); +        setRegMask(MI, RegMask);          Changed = true;        }; -      if (const Function *F = findCalledFunction(*M, MI)) { -        UpdateRegMask(F); +      if (const Function *F = findCalledFunction(M, MI)) { +        UpdateRegMask(*F);        } else { -        DEBUG(dbgs() << "Failed to find call target function\n"); +        LLVM_DEBUG(dbgs() << "Failed to find call target function\n");        } -      DEBUG(dbgs() << "Call Instruction After Register Usage Info Propagation : " -            << MI << '\n'); +      LLVM_DEBUG( +          dbgs() << "Call Instruction After Register Usage Info Propagation : " +                 << MI << '\n');      }    } -  DEBUG(dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" -                  "++++++ \n"); +  LLVM_DEBUG( +      dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +                "++++++ \n");    return Changed;  } + +FunctionPass *llvm::createRegUsageInfoPropPass() { +  return new RegUsageInfoPropagation(); +} diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index b0eeb81f583e..add8faec97d4 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -49,9 +49,6 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {    if (MF->getSubtarget().getRegisterInfo() != TRI) {      TRI = MF->getSubtarget().getRegisterInfo();      RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); -    unsigned NumPSets = TRI->getNumRegPressureSets(); -    PSetLimits.reset(new unsigned[NumPSets]); -    std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0);      Update = true;    } @@ -80,8 +77,12 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {    }    // Invalidate cached information from previous function. -  if (Update) +  if (Update) { +    unsigned NumPSets = TRI->getNumRegPressureSets(); +    PSetLimits.reset(new unsigned[NumPSets]); +    std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0);      ++Tag; +  }  }  /// compute - Compute the preferred allocation order for RC with reserved @@ -150,7 +151,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {    RCI.MinCost = uint8_t(MinCost);    RCI.LastCostChange = LastCostChange; -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";      for (unsigned I = 0; I != RCI.NumRegs; ++I)        dbgs() << ' ' << printReg(RCI.Order[I], TRI); diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index 00a2e93c71ca..cad13a60efd2 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -115,11 +115,11 @@ namespace {      /// checked for smaller live intervals.      bool ShrinkMainRange; -    /// \brief True if the coalescer should aggressively coalesce global copies +    /// True if the coalescer should aggressively coalesce global copies      /// in favor of keeping local copies.      bool JoinGlobalCopies; -    /// \brief True if the coalescer should aggressively coalesce fall-thru +    /// True if the coalescer should aggressively coalesce fall-thru      /// blocks exclusively containing copies.      bool JoinSplitEdges; @@ -162,7 +162,7 @@ namespace {      /// was successfully coalesced away. If it is not currently possible to      /// coalesce this interval, but it may be possible if other things get      /// coalesced, then it returns true by reference in 'Again'. -    bool joinCopy(MachineInstr *TheCopy, bool &Again); +    bool joinCopy(MachineInstr *CopyMI, bool &Again);      /// Attempt to join these two intervals.  On failure, this      /// returns false.  The output "SrcInt" will not have been modified, so we @@ -233,9 +233,11 @@ namespace {      void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,                        MachineOperand &MO, unsigned SubRegIdx); -    /// Handle copies of undef values. -    /// Returns true if @p CopyMI was a copy of an undef value and eliminated. -    bool eliminateUndefCopy(MachineInstr *CopyMI); +    /// Handle copies of undef values. If the undef value is an incoming +    /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF. +    /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise, +    /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point). +    MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);      /// Check whether or not we should apply the terminal rule on the      /// destination (Dst) of \p Copy. @@ -568,7 +570,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,    // in IntB, we can merge them.    if (ValS+1 != BS) return false; -  DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); +  LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));    SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;    // We are about to delete CopyMI, so need to remove it as the 'instruction @@ -587,6 +589,13 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,    // Do the same for the subregister segments.    for (LiveInterval::SubRange &S : IntB.subranges()) { +    // Check for SubRange Segments of the form [1234r,1234d:0) which can be +    // removed to prevent creating bogus SubRange Segments. +    LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx); +    if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) { +      S.removeSegment(*SS, true); +      continue; +    }      VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);      S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));      VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); @@ -594,7 +603,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,        S.MergeValueNumberInto(SubBValNo, SubValSNo);    } -  DEBUG(dbgs() << "   result = " << IntB << '\n'); +  LLVM_DEBUG(dbgs() << "   result = " << IntB << '\n');    // If the source instruction was killing the source register before the    // merge, unset the isKill marker given the live range has been extended. @@ -603,11 +612,21 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,      ValSEndInst->getOperand(UIdx).setIsKill(false);    } -  // Rewrite the copy. If the copy instruction was killing the destination -  // register before the merge, find the last use and trim the live range. That -  // will also add the isKill marker. +  // Rewrite the copy.    CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); -  if (AS->end == CopyIdx) +  // If the copy instruction was killing the destination register or any +  // subrange before the merge trim the live range. +  bool RecomputeLiveRange = AS->end == CopyIdx; +  if (!RecomputeLiveRange) { +    for (LiveInterval::SubRange &S : IntA.subranges()) { +      LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx); +      if (SS != S.end() && SS->end == CopyIdx) { +        RecomputeLiveRange = true; +        break; +      } +    } +  } +  if (RecomputeLiveRange)      shrinkToUses(&IntA);    ++numExtends; @@ -641,7 +660,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,    return false;  } -/// Copy segements with value number @p SrcValNo from liverange @p Src to live +/// Copy segments with value number @p SrcValNo from liverange @p Src to live  /// range @Dst and use value number @p DstValNo there.  static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,                                   const LiveRange &Src, const VNInfo *SrcValNo) { @@ -742,8 +761,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,        return false;    } -  DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' -               << *DefMI); +  LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' +                    << *DefMI);    // At this point we have decided that it is legal to do this    // transformation.  Start by commuting the instruction. @@ -812,7 +831,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,      VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);      if (!DVNI)        continue; -    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); +    LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);      assert(DVNI->def == DefIdx);      BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);      for (LiveInterval::SubRange &S : IntB.subranges()) { @@ -853,11 +872,11 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,    BValNo->def = AValNo->def;    addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); -  DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); +  LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');    LIS->removeVRegDefAt(IntA, AValNo->def); -  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n'); +  LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');    ++numCommutes;    return true;  } @@ -989,13 +1008,24 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,    if (CopyLeftBB && CopyLeftBB->succ_size() > 1)      return false; -  // Now ok to move copy. +  // Now (almost sure it's) ok to move copy.    if (CopyLeftBB) { -    DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " -                 << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); +    // Position in CopyLeftBB where we should insert new copy. +    auto InsPos = CopyLeftBB->getFirstTerminator(); + +    // Make sure that B isn't referenced in the terminators (if any) at the end +    // of the predecessor since we're about to insert a new definition of B +    // before them. +    if (InsPos != CopyLeftBB->end()) { +      SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true); +      if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB))) +        return false; +    } + +    LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " +                      << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);      // Insert new copy to CopyLeftBB. -    auto InsPos = CopyLeftBB->getFirstTerminator();      MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),                                        TII->get(TargetOpcode::COPY), IntB.reg)                                    .addReg(IntA.reg); @@ -1010,8 +1040,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,      // the deleted list.      ErasedInstrs.erase(NewCopyMI);    } else { -    DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " -                 << printMBBReference(MBB) << '\t' << CopyMI); +    LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " +                      << printMBBReference(MBB) << '\t' << CopyMI);    }    // Remove CopyMI. @@ -1039,6 +1069,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,      BValNo->markUnused();      LIS->extendToIndices(SR, EndPoints);    } +  // If any dead defs were extended, truncate them. +  shrinkToUses(&IntB);    // Finally, update the live-range of IntA.    shrinkToUses(&IntA); @@ -1174,7 +1206,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,         I != E; ++I) {      MachineOperand &MO = CopyMI->getOperand(I);      if (MO.isReg()) { -      assert(MO.isImplicit() && "No explicit operands after implict operands."); +      assert(MO.isImplicit() && "No explicit operands after implicit operands.");        // Discard VReg implicit defs.        if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))          ImplicitOps.push_back(MO); @@ -1220,6 +1252,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,      // Update machine operands and add flags.      updateRegDefsUses(DstReg, DstReg, DstIdx);      NewMI.getOperand(0).setSubReg(NewIdx); +    // updateRegDefUses can add an "undef" flag to the definition, since +    // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make +    // sure that "undef" is not set. +    if (NewIdx == 0) +      NewMI.getOperand(0).setIsUndef(false);      // Add dead subregister definitions if we are defining the whole register      // but only part of it is live.      // This could happen if the rematerialization instruction is rematerializing @@ -1266,8 +1303,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,        bool UpdatedSubRanges = false;        for (LiveInterval::SubRange &SR : DstInt.subranges()) {          if ((SR.LaneMask & DstMask).none()) { -          DEBUG(dbgs() << "Removing undefined SubRange " -                << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); +          LLVM_DEBUG(dbgs() +                     << "Removing undefined SubRange " +                     << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");            // VNI is in ValNo - remove any segments in this SubRange that have this ValNo            if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {              SR.removeValNo(RmValNo); @@ -1299,7 +1337,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,      // %1 = somedef ; %1 GR8      // dead ECX = remat ; implicit-def CL      // = somedef %1 ; %1 GR8 -    // %1 will see the inteferences with CL but not with CH since +    // %1 will see the interferences with CL but not with CH since      // no live-ranges would have been created for ECX.      // Fix that!      SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); @@ -1324,7 +1362,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,          LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());    } -  DEBUG(dbgs() << "Remat: " << NewMI); +  LLVM_DEBUG(dbgs() << "Remat: " << NewMI);    ++NumReMats;    // The source interval can become smaller because we removed a use. @@ -1339,7 +1377,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,          // Move the debug value directly after the def of the rematerialized          // value in DstReg.          MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI); -        DEBUG(dbgs() << "\t\tupdated: " << *UseMI); +        LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);        }      }      eliminateDeadDefs(); @@ -1348,9 +1386,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,    return true;  } -bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { -  // ProcessImpicitDefs may leave some copies of <undef> values, it only removes -  // local variables. When we have a copy like: +MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { +  // ProcessImplicitDefs may leave some copies of <undef> values, it only +  // removes local variables. When we have a copy like:    //    //   %1 = COPY undef %2    // @@ -1372,16 +1410,34 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {        if ((SR.LaneMask & SrcMask).none())          continue;        if (SR.liveAt(Idx)) -        return false; +        return nullptr;      }    } else if (SrcLI.liveAt(Idx)) -    return false; +    return nullptr; -  DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); - -  // Remove any DstReg segments starting at the instruction. +  // If the undef copy defines a live-out value (i.e. an input to a PHI def), +  // then replace it with an IMPLICIT_DEF.    LiveInterval &DstLI = LIS->getInterval(DstReg);    SlotIndex RegIndex = Idx.getRegSlot(); +  LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex); +  assert(Seg != nullptr && "No segment for defining instruction"); +  if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) { +    if (V->isPHIDef()) { +      CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); +      for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { +        MachineOperand &MO = CopyMI->getOperand(i-1); +        if (MO.isReg() && MO.isUse()) +          CopyMI->RemoveOperand(i-1); +      } +      LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an " +                           "implicit def\n"); +      return CopyMI; +    } +  } + +  // Remove any DstReg segments starting at the instruction. +  LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); +    // Remove value or merge with previous one in case of a subregister def.    if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {      VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); @@ -1424,7 +1480,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {      if (isLive)        continue;      MO.setIsUndef(true); -    DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); +    LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);    }    // A def of a subregister may be a use of the other subregisters, so @@ -1437,7 +1493,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {        MO.setIsUndef(true);    LIS->shrinkToUses(&DstLI); -  return true; +  return CopyMI;  }  void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, @@ -1539,12 +1595,12 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,          MO.substVirtReg(DstReg, SubIdx, *TRI);      } -    DEBUG({ -        dbgs() << "\t\tupdated: "; -        if (!UseMI->isDebugValue()) -          dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; -        dbgs() << *UseMI; -      }); +    LLVM_DEBUG({ +      dbgs() << "\t\tupdated: "; +      if (!UseMI->isDebugValue()) +        dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; +      dbgs() << *UseMI; +    });    }  } @@ -1553,7 +1609,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {    // reserved register. This doesn't increase register pressure, so it is    // always beneficial.    if (!MRI->isReserved(CP.getDstReg())) { -    DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); +    LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");      return false;    } @@ -1561,17 +1617,18 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {    if (JoinVInt.containsOneValue())      return true; -  DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n"); +  LLVM_DEBUG( +      dbgs() << "\tCannot join complex intervals into reserved register.\n");    return false;  }  bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {    Again = false; -  DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); +  LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);    CoalescerPair CP(*TRI);    if (!CP.setRegisters(CopyMI)) { -    DEBUG(dbgs() << "\tNot coalescable.\n"); +    LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");      return false;    } @@ -1586,7 +1643,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {      }      if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,                               CP.getNewRC(), *LIS)) { -      DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); +      LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");        return false;      }    } @@ -1595,16 +1652,21 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {    // sometimes dead copies slip through, and we can't generate invalid live    // ranges.    if (!CP.isPhys() && CopyMI->allDefsAreDead()) { -    DEBUG(dbgs() << "\tCopy is dead.\n"); +    LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");      DeadDefs.push_back(CopyMI);      eliminateDeadDefs();      return true;    }    // Eliminate undefs. -  if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { -    deleteInstr(CopyMI); -    return false;  // Not coalescable. +  if (!CP.isPhys()) { +    // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce. +    if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) { +      if (UndefMI->isImplicitDef()) +        return false; +      deleteInstr(CopyMI); +      return false;  // Not coalescable. +    }    }    // Coalesced copies are normally removed immediately, but transformations @@ -1612,7 +1674,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {    // When that happens, just join the values and remove the copy.    if (CP.getSrcReg() == CP.getDstReg()) {      LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); -    DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); +    LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');      const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);      LiveQueryResult LRQ = LI.Query(CopyIdx);      if (VNInfo *DefVNI = LRQ.valueDefined()) { @@ -1629,7 +1691,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {            S.MergeValueNumberInto(SDefVNI, SReadVNI);          }        } -      DEBUG(dbgs() << "\tMerged values:          " << LI << '\n'); +      LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');      }      deleteInstr(CopyMI);      return true; @@ -1637,9 +1699,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {    // Enforce policies.    if (CP.isPhys()) { -    DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI) -                 << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) -                 << '\n'); +    LLVM_DEBUG(dbgs() << "\tConsidering merging " +                      << printReg(CP.getSrcReg(), TRI) << " with " +                      << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');      if (!canJoinPhys(CP)) {        // Before giving up coalescing, if definition of source is defined by        // trivial computation, try rematerializing it. @@ -1656,7 +1718,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {                             LIS->getInterval(CP.getDstReg()).size())        CP.flip(); -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "\tConsidering merging to "               << TRI->getRegClassName(CP.getNewRC()) << " with ";        if (CP.getDstIdx() && CP.getSrcIdx()) @@ -1692,7 +1754,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {        if (adjustCopiesBackFrom(CP, CopyMI) ||            removeCopyByCommutingDef(CP, CopyMI)) {          deleteInstr(CopyMI); -        DEBUG(dbgs() << "\tTrivial!\n"); +        LLVM_DEBUG(dbgs() << "\tTrivial!\n");          return true;        }      } @@ -1704,7 +1766,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {          return true;      // Otherwise, we are unable to join the intervals. -    DEBUG(dbgs() << "\tInterference!\n"); +    LLVM_DEBUG(dbgs() << "\tInterference!\n");      Again = true;  // May be possible to coalesce later.      return false;    } @@ -1738,8 +1800,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {      for (LiveInterval::SubRange &S : LI.subranges()) {        if ((S.LaneMask & ShrinkMask).none())          continue; -      DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) -                   << ")\n"); +      LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) +                        << ")\n");        LIS->shrinkToUses(S, LI.reg);      }      LI.removeEmptySubRanges(); @@ -1756,7 +1818,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {    // Update regalloc hint.    TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())             << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';      dbgs() << "\tResult = "; @@ -1777,7 +1839,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {    assert(CP.isPhys() && "Must be a physreg copy");    assert(MRI->isReserved(DstReg) && "Not a reserved register");    LiveInterval &RHS = LIS->getInterval(SrcReg); -  DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); +  LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');    assert(RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1796,7 +1858,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {            return false;        }        if (RHS.overlaps(LIS->getRegUnit(*UI))) { -        DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n'); +        LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) +                          << '\n');          return false;        }      } @@ -1805,7 +1868,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {      BitVector RegMaskUsable;      if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&          !RegMaskUsable.test(DstReg)) { -      DEBUG(dbgs() << "\t\tRegMask interference\n"); +      LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");        return false;      }    } @@ -1835,12 +1898,12 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {      //   %y = def      //   ...      if (!MRI->hasOneNonDBGUse(SrcReg)) { -      DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); +      LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");        return false;      }      if (!LIS->intervalIsInOneMBB(RHS)) { -      DEBUG(dbgs() << "\t\tComplex control flow!\n"); +      LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");        return false;      } @@ -1858,7 +1921,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {             SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {          MachineInstr *MI = LIS->getInstructionFromIndex(SI);          if (MI->readsRegister(DstReg, TRI)) { -          DEBUG(dbgs() << "\t\tInterference (read): " << *MI); +          LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);            return false;          }        } @@ -1866,8 +1929,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {      // We're going to remove the copy which defines a physical reserved      // register, so remove its valno, etc. -    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI) -          << " at " << CopyRegIdx << "\n"); +    LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of " +                      << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");      LIS->removePhysRegDefAt(DstReg, CopyRegIdx);      // Create a new dead def at the new def location. @@ -2057,6 +2120,13 @@ class JoinVals {      /// True once Pruned above has been computed.      bool PrunedComputed = false; +    /// True if this value is determined to be identical to OtherVNI +    /// (in valuesIdentical). This is used with CR_Erase where the erased +    /// copy is redundant, i.e. the source value is already the same as +    /// the destination. In such cases the subranges need to be updated +    /// properly. See comment at pruneSubRegValues for more info. +    bool Identical = false; +      Val() = default;      bool isAnalyzed() const { return WriteLanes.any(); } @@ -2073,7 +2143,7 @@ class JoinVals {    /// Find the ultimate value that VNI was copied from.    std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; -  bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; +  bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;    /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].    /// Return a conflict resolution when possible, but leave the hard cases as @@ -2191,17 +2261,17 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)  std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(      const VNInfo *VNI) const { -  unsigned Reg = this->Reg; +  unsigned TrackReg = Reg;    while (!VNI->isPHIDef()) {      SlotIndex Def = VNI->def;      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);      assert(MI && "No defining instruction");      if (!MI->isFullCopy()) -      return std::make_pair(VNI, Reg); +      return std::make_pair(VNI, TrackReg);      unsigned SrcReg = MI->getOperand(1).getReg();      if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) -      return std::make_pair(VNI, Reg); +      return std::make_pair(VNI, TrackReg);      const LiveInterval &LI = LIS->getInterval(SrcReg);      const VNInfo *ValueIn; @@ -2210,7 +2280,8 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(        LiveQueryResult LRQ = LI.Query(Def);        ValueIn = LRQ.valueIn();      } else { -      // Query subranges. Pick the first matching one. +      // Query subranges. Ensure that all matching ones take us to the same def +      // (allowing some of them to be undef).        ValueIn = nullptr;        for (const LiveInterval::SubRange &S : LI.subranges()) {          // Transform lanemask to a mask in the joined live interval. @@ -2218,16 +2289,27 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(          if ((SMask & LaneMask).none())            continue;          LiveQueryResult LRQ = S.Query(Def); -        ValueIn = LRQ.valueIn(); -        break; +        if (!ValueIn) { +          ValueIn = LRQ.valueIn(); +          continue; +        } +        if (LRQ.valueIn() && ValueIn != LRQ.valueIn()) +          return std::make_pair(VNI, TrackReg);        }      } -    if (ValueIn == nullptr) -      break; +    if (ValueIn == nullptr) { +      // Reaching an undefined value is legitimate, for example: +      // +      // 1   undef %0.sub1 = ...  ;; %0.sub0 == undef +      // 2   %1 = COPY %0         ;; %1 is defined here. +      // 3   %0 = COPY %1         ;; Now %0.sub0 has a definition, +      //                          ;; but it's equivalent to "undef". +      return std::make_pair(nullptr, SrcReg); +    }      VNI = ValueIn; -    Reg = SrcReg; +    TrackReg = SrcReg;    } -  return std::make_pair(VNI, Reg); +  return std::make_pair(VNI, TrackReg);  }  bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, @@ -2235,12 +2317,17 @@ bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,    const VNInfo *Orig0;    unsigned Reg0;    std::tie(Orig0, Reg0) = followCopyChain(Value0); -  if (Orig0 == Value1) +  if (Orig0 == Value1 && Reg0 == Other.Reg)      return true;    const VNInfo *Orig1;    unsigned Reg1;    std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); +  // If both values are undefined, and the source registers are the same +  // register, the values are identical. Filter out cases where only one +  // value is defined. +  if (Orig0 == nullptr || Orig1 == nullptr) +    return Orig0 == Orig1 && Reg0 == Reg1;    // The values are equal if they are defined at the same place and use the    // same register. Note that we cannot compare VNInfos directly as some of @@ -2375,9 +2462,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {    // to erase the IMPLICIT_DEF instruction.    if (OtherV.ErasableImplicitDef && DefMI &&        DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { -    DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def -                 << " extends into " << printMBBReference(*DefMI->getParent()) -                 << ", keeping it.\n"); +    LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def +                      << " extends into " +                      << printMBBReference(*DefMI->getParent()) +                      << ", keeping it.\n");      OtherV.ErasableImplicitDef = false;    } @@ -2415,9 +2503,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {    //   %other = COPY %ext    //   %this  = COPY %ext <-- Erase this copy    // -  if (DefMI->isFullCopy() && !CP.isPartial() -      && valuesIdentical(VNI, V.OtherVNI, Other)) +  if (DefMI->isFullCopy() && !CP.isPartial() && +      valuesIdentical(VNI, V.OtherVNI, Other)) { +    V.Identical = true;      return CR_Erase; +  }    // If the lanes written by this instruction were all undef in OtherVNI, it is    // still safe to join the live ranges. This can't be done with a simple value @@ -2487,11 +2577,11 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {      assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");      assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");      Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; -    DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' -                 << LR.getValNumInfo(ValNo)->def << " into " -                 << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' -                 << V.OtherVNI->def << " --> @" -                 << NewVNInfo[Assignments[ValNo]]->def << '\n'); +    LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' +                      << LR.getValNumInfo(ValNo)->def << " into " +                      << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' +                      << V.OtherVNI->def << " --> @" +                      << NewVNInfo[Assignments[ValNo]]->def << '\n');      break;    case CR_Replace:    case CR_Unresolved: { @@ -2517,8 +2607,8 @@ bool JoinVals::mapValues(JoinVals &Other) {    for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {      computeAssignment(i, Other);      if (Vals[i].Resolution == CR_Impossible) { -      DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i -                   << '@' << LR.getValNumInfo(i)->def << '\n'); +      LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i +                        << '@' << LR.getValNumInfo(i)->def << '\n');        return false;      }    } @@ -2540,13 +2630,13 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,      // lanes escape the block.      SlotIndex End = OtherI->end;      if (End >= MBBEnd) { -      DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' -                   << OtherI->valno->id << '@' << OtherI->start << '\n'); +      LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' +                        << OtherI->valno->id << '@' << OtherI->start << '\n');        return false;      } -    DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' -                 << OtherI->valno->id << '@' << OtherI->start -                 << " to " << End << '\n'); +    LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' +                      << OtherI->valno->id << '@' << OtherI->start << " to " +                      << End << '\n');      // A dead def is not a problem.      if (End.isDead())        break; @@ -2567,7 +2657,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,  bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,                           LaneBitmask Lanes) const { -  if (MI.isDebugValue()) +  if (MI.isDebugInstr())      return false;    for (const MachineOperand &MO : MI.operands()) {      if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) @@ -2587,8 +2677,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {      assert(V.Resolution != CR_Impossible && "Unresolvable conflict");      if (V.Resolution != CR_Unresolved)        continue; -    DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i -                 << '@' << LR.getValNumInfo(i)->def << '\n'); +    LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@' +                      << LR.getValNumInfo(i)->def << '\n');      if (SubRangeJoin)        return false; @@ -2625,7 +2715,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {      while (true) {        assert(MI != MBB->end() && "Bad LastMI");        if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) { -        DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); +        LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);          return false;        }        // LastMI is the last instruction to use the current value. @@ -2698,8 +2788,8 @@ void JoinVals::pruneValues(JoinVals &Other,          if (!EraseImpDef)            EndPoints.push_back(Def);        } -      DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def -                   << ": " << Other.LR << '\n'); +      LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def +                        << ": " << Other.LR << '\n');        break;      }      case CR_Erase: @@ -2710,8 +2800,8 @@ void JoinVals::pruneValues(JoinVals &Other,          // computeAssignment(), the value that was originally copied could have          // been replaced.          LIS->pruneValue(LR, Def, &EndPoints); -        DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " -                     << Def << ": " << LR << '\n'); +        LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " +                          << Def << ": " << LR << '\n');        }        break;      case CR_Unresolved: @@ -2721,21 +2811,65 @@ void JoinVals::pruneValues(JoinVals &Other,    }  } +/// Consider the following situation when coalescing the copy between +/// %31 and %45 at 800. (The vertical lines represent live range segments.) +/// +///                              Main range         Subrange 0004 (sub2) +///                              %31    %45           %31    %45 +///  544    %45 = COPY %28               +                    + +///                                      | v1                 | v1 +///  560B bb.1:                          +                    + +///  624        = %45.sub2               | v2                 | v2 +///  800    %31 = COPY %45        +      +             +      + +///                               | v0                 | v0 +///  816    %31.sub1 = ...        +                    | +///  880    %30 = COPY %31        | v1                 + +///  928    %45 = COPY %30        |      +                    + +///                               |      | v0                 | v0  <--+ +///  992B   ; backedge -> bb.1    |      +                    +        | +/// 1040        = %31.sub0        +                                    | +///                                                 This value must remain +///                                                 live-out! +/// +/// Assuming that %31 is coalesced into %45, the copy at 928 becomes +/// redundant, since it copies the value from %45 back into it. The +/// conflict resolution for the main range determines that %45.v0 is +/// to be erased, which is ok since %31.v1 is identical to it. +/// The problem happens with the subrange for sub2: it has to be live +/// on exit from the block, but since 928 was actually a point of +/// definition of %45.sub2, %45.sub2 was not live immediately prior +/// to that definition. As a result, when 928 was erased, the value v0 +/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an +/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2, +/// providing an incorrect value to the use at 624. +/// +/// Since the main-range values %31.v1 and %45.v0 were proved to be +/// identical, the corresponding values in subranges must also be the +/// same. A redundant copy is removed because it's not needed, and not +/// because it copied an undefined value, so any liveness that originated +/// from that copy cannot disappear. When pruning a value that started +/// at the removed copy, the corresponding identical value must be +/// extended to replace it.  void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {    // Look for values being erased.    bool DidPrune = false;    for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { +    Val &V = Vals[i];      // We should trigger in all cases in which eraseInstrs() does something.      // match what eraseInstrs() is doing, print a message so -    if (Vals[i].Resolution != CR_Erase && -        (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef || -         !Vals[i].Pruned)) +    if (V.Resolution != CR_Erase && +        (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))        continue;      // Check subranges at the point where the copy will be removed.      SlotIndex Def = LR.getValNumInfo(i)->def; +    SlotIndex OtherDef; +    if (V.Identical) +      OtherDef = V.OtherVNI->def; +      // Print message so mismatches with eraseInstrs() can be diagnosed. -    DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n'); +    LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def +                      << '\n');      for (LiveInterval::SubRange &S : LI.subranges()) {        LiveQueryResult Q = S.Query(Def); @@ -2743,19 +2877,28 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {        // copied and we must remove that subrange value as well.        VNInfo *ValueOut = Q.valueOutOrDead();        if (ValueOut != nullptr && Q.valueIn() == nullptr) { -        DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) -                     << " at " << Def << "\n"); -        LIS->pruneValue(S, Def, nullptr); +        LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) +                          << " at " << Def << "\n"); +        SmallVector<SlotIndex,8> EndPoints; +        LIS->pruneValue(S, Def, &EndPoints);          DidPrune = true;          // Mark value number as unused.          ValueOut->markUnused(); + +        if (V.Identical && S.Query(OtherDef).valueOut()) { +          // If V is identical to V.OtherVNI (and S was live at OtherDef), +          // then we can't simply prune V from S. V needs to be replaced +          // with V.OtherVNI. +          LIS->extendToIndices(S, EndPoints); +        }          continue;        }        // If a subrange ends at the copy, then a value was copied but only        // partially used later. Shrink the subregister range appropriately.        if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { -        DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) -                     << " at " << Def << "\n"); +        LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane " +                          << PrintLaneMask(S.LaneMask) << " at " << Def +                          << "\n");          ShrinkMask |= S.LaneMask;        }      } @@ -2867,7 +3010,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,              std::prev(S)->end = NewEnd;          }        } -      DEBUG({ +      LLVM_DEBUG({          dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';          if (LI != nullptr)            dbgs() << "\t\t  LHS = " << *LI << '\n'; @@ -2885,7 +3028,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,            ShrinkRegs.push_back(Reg);        }        ErasedInstrs.insert(MI); -      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); +      LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);        LIS->RemoveMachineInstrFromMaps(*MI);        MI->eraseFromParent();        break; @@ -2940,13 +3083,14 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,    LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),                NewVNInfo); -  DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); +  LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask) +                    << ' ' << LRange << "\n");    if (EndPoints.empty())      return;    // Recompute the parts of the live range we had to remove because of    // CR_Replace conflicts. -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";      for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {        dbgs() << EndPoints[i]; @@ -2985,9 +3129,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {    JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),                     NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); -  DEBUG(dbgs() << "\t\tRHS = " << RHS -               << "\n\t\tLHS = " << LHS -               << '\n'); +  LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');    // First compute NewVNInfo and the simple value mappings.    // Detect impossible conflicts early. @@ -3018,8 +3160,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {          R.LaneMask = Mask;        }      } -    DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) -                 << ' ' << LHS << '\n'); +    LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS +                      << '\n');      // Determine lanemasks of RHS in the coalesced register and merge subranges.      unsigned SrcIdx = CP.getSrcIdx(); @@ -3034,7 +3176,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {          mergeSubRangeInto(LHS, R, Mask, CP);        }      } -    DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); +    LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");      // Pruning implicit defs from subranges may result in the main range      // having stale segments. @@ -3072,7 +3214,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {    if (!EndPoints.empty()) {      // Recompute the parts of the live range we had to remove because of      // CR_Replace conflicts. -    DEBUG({ +    LLVM_DEBUG({        dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";        for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {          dbgs() << EndPoints[i]; @@ -3220,7 +3362,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {        continue;      // Check that OtherReg interfere with DstReg.      if (LIS->getInterval(OtherReg).overlaps(DstLI)) { -      DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n'); +      LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) +                        << '\n');        return true;      }    } @@ -3229,7 +3372,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {  void  RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { -  DEBUG(dbgs() << MBB->getName() << ":\n"); +  LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");    // Collect all copy-like instructions in MBB. Don't start coalescing anything    // yet, it might invalidate the iterator. @@ -3294,7 +3437,7 @@ void RegisterCoalescer::coalesceLocals() {  }  void RegisterCoalescer::joinAllIntervals() { -  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); +  LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");    assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");    std::vector<MBBPriorityInfo> MBBs; @@ -3350,8 +3493,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {    // splitting optimization.    JoinSplitEdges = EnableJoinSplits; -  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" -               << "********** Function: " << MF->getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" +                    << "********** Function: " << MF->getName() << '\n');    if (VerifyCoalescing)      MF->verify(this, "Before register coalescing"); @@ -3368,14 +3511,15 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {    array_pod_sort(InflateRegs.begin(), InflateRegs.end());    InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),                      InflateRegs.end()); -  DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); +  LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() +                    << " regs.\n");    for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {      unsigned Reg = InflateRegs[i];      if (MRI->reg_nodbg_empty(Reg))        continue;      if (MRI->recomputeRegClass(Reg)) { -      DEBUG(dbgs() << printReg(Reg) << " inflated to " -                   << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); +      LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to " +                        << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');        ++NumInflated;        LiveInterval &LI = LIS->getInterval(Reg); @@ -3398,7 +3542,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {      }    } -  DEBUG(dump()); +  LLVM_DEBUG(dump());    if (VerifyCoalescing)      MF->verify(this, "After register coalescing");    return true; diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 9ac810c7c723..51414de518fd 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -28,6 +28,7 @@  #include "llvm/CodeGen/SlotIndexes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/MC/MCRegisterInfo.h"  #include "llvm/Support/Compiler.h" @@ -587,7 +588,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,    for (auto I = Defs.begin(); I != Defs.end(); ) {      LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit,                                             Pos.getDeadSlot()); -    // If the the def is all that is live after the instruction, then in case +    // If the def is all that is live after the instruction, then in case      // of a subregister def we need a read-undef flag.      unsigned RegUnit = I->RegUnit;      if (TargetRegisterInfo::isVirtualRegister(RegUnit) && @@ -635,7 +636,7 @@ void PressureDiffs::init(unsigned N) {    }    Max = Size;    free(PDiffArray); -  PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff))); +  PDiffArray = static_cast<PressureDiff*>(safe_calloc(N, sizeof(PressureDiff)));  }  void PressureDiffs::addInstruction(unsigned Idx, @@ -747,7 +748,7 @@ void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {  /// instruction independent of liveness.  void RegPressureTracker::recede(const RegisterOperands &RegOpers,                                  SmallVectorImpl<RegisterMaskPair> *LiveUses) { -  assert(!CurrPos->isDebugValue()); +  assert(!CurrPos->isDebugInstr());    // Boost pressure for all dead defs together.    bumpDeadDefs(RegOpers.DeadDefs); @@ -1018,7 +1019,7 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,  /// This is intended for speculative queries. It leaves pressure inconsistent  /// with the current position, so must be restored by the caller.  void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { -  assert(!MI->isDebugValue() && "Expect a nondebug instruction."); +  assert(!MI->isDebugInstr() && "Expect a nondebug instruction.");    SlotIndex SlotIdx;    if (RequireIntervals) @@ -1259,7 +1260,7 @@ LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,  /// This is intended for speculative queries. It leaves pressure inconsistent  /// with the current position, so must be restored by the caller.  void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { -  assert(!MI->isDebugValue() && "Expect a nondebug instruction."); +  assert(!MI->isDebugInstr() && "Expect a nondebug instruction.");    SlotIndex SlotIdx;    if (RequireIntervals) diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 97967124add6..a878c34f9aa4 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -111,7 +111,7 @@ void RegScavenger::determineKillsAndDefs() {    assert(Tracking && "Must be tracking to determine kills and defs");    MachineInstr &MI = *MBBI; -  assert(!MI.isDebugValue() && "Debug values have no kills or defs"); +  assert(!MI.isDebugInstr() && "Debug values have no kills or defs");    // Find out which registers are early clobbered, killed, defined, and marked    // def-dead in this instruction. @@ -158,12 +158,12 @@ void RegScavenger::unprocess() {    assert(Tracking && "Cannot unprocess because we're not tracking");    MachineInstr &MI = *MBBI; -  if (!MI.isDebugValue()) { +  if (!MI.isDebugInstr()) {      determineKillsAndDefs();      // Commit the changes. -    setUsed(KillRegUnits);      setUnused(DefRegUnits); +    setUsed(KillRegUnits);    }    if (MBBI == MBB->begin()) { @@ -195,7 +195,7 @@ void RegScavenger::forward() {      I->Restore = nullptr;    } -  if (MI.isDebugValue()) +  if (MI.isDebugInstr())      return;    determineKillsAndDefs(); @@ -288,8 +288,8 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {  unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {    for (unsigned Reg : *RC) {      if (!isRegUsed(Reg)) { -      DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) +                        << "\n");        return Reg;      }    } @@ -318,7 +318,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,    bool inVirtLiveRange = false;    for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { -    if (MI->isDebugValue()) { +    if (MI->isDebugInstr()) {        ++InstrLimit; // Don't count debug instructions        continue;      } @@ -561,15 +561,15 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,    // If we found an unused register there is no reason to spill it.    if (!isRegUsed(SReg)) { -    DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n"); +    LLVM_DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n");      return SReg;    }    ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);    Scavenged.Restore = &*std::prev(UseMI); -  DEBUG(dbgs() << "Scavenged register (with spill): " << printReg(SReg, TRI) -               << "\n"); +  LLVM_DEBUG(dbgs() << "Scavenged register (with spill): " +                    << printReg(SReg, TRI) << "\n");    return SReg;  } @@ -594,14 +594,15 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,      MachineBasicBlock::iterator ReloadAfter =        RestoreAfter ? std::next(MBBI) : MBBI;      MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); -    DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); +    LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');      ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);      Scavenged.Restore = &*std::prev(SpillBefore);      LiveUnits.removeReg(Reg); -    DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) -                 << " until " << *SpillBefore); +    LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) +                      << " until " << *SpillBefore);    } else { -    DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n'); +    LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) +                      << '\n');    }    return Reg;  } @@ -757,8 +758,8 @@ void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {      bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);      if (Again) { -      DEBUG(dbgs() << "Warning: Required two scavenging passes for block " -            << MBB.getName() << '\n'); +      LLVM_DEBUG(dbgs() << "Warning: Required two scavenging passes for block " +                        << MBB.getName() << '\n');        Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB);        // The target required a 2nd run (because it created new vregs while        // spilling). Refuse to do another pass to keep compiletime in check. diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp index 4e42deb406e1..6a31118cc562 100644 --- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -31,8 +31,6 @@  using namespace llvm; -#define DEBUG_TYPE "ip-regalloc" -  static cl::opt<bool> DumpRegUsage(      "print-regusage", cl::init(false), cl::Hidden,      cl::desc("print register usage details collected for analysis.")); @@ -42,7 +40,9 @@ INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",  char PhysicalRegisterUsageInfo::ID = 0; -void PhysicalRegisterUsageInfo::anchor() {} +void PhysicalRegisterUsageInfo::setTargetMachine(const TargetMachine &TM) { +  this->TM = &TM; +}  bool PhysicalRegisterUsageInfo::doInitialization(Module &M) {    RegMasks.grow(M.size()); @@ -58,22 +58,19 @@ bool PhysicalRegisterUsageInfo::doFinalization(Module &M) {  }  void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo( -    const Function *FP, std::vector<uint32_t> RegMask) { -  assert(FP != nullptr && "Function * can't be nullptr."); -  RegMasks[FP] = std::move(RegMask); +    const Function &FP, ArrayRef<uint32_t> RegMask) { +  RegMasks[&FP] = RegMask;  } -const std::vector<uint32_t> * -PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) { -  auto It = RegMasks.find(FP); +ArrayRef<uint32_t> +PhysicalRegisterUsageInfo::getRegUsageInfo(const Function &FP) { +  auto It = RegMasks.find(&FP);    if (It != RegMasks.end()) -    return &(It->second); -  return nullptr; +    return makeArrayRef<uint32_t>(It->second); +  return ArrayRef<uint32_t>();  }  void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { -  const TargetRegisterInfo *TRI; -    using FuncPtrRegMaskPair = std::pair<const Function *, std::vector<uint32_t>>;    SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector; @@ -83,7 +80,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {      FPRMPairVector.push_back(&RegMask);    // sort the vector to print analysis in alphabatic order of function name. -  std::sort( +  llvm::sort(        FPRMPairVector.begin(), FPRMPairVector.end(),        [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {          return A->first->getName() < B->first->getName(); @@ -92,8 +89,9 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {    for (const FuncPtrRegMaskPair *FPRMPair : FPRMPairVector) {      OS << FPRMPair->first->getName() << " "         << "Clobbered Registers: "; -    TRI = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first)) -              .getRegisterInfo(); +    const TargetRegisterInfo *TRI +        = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first)) +          .getRegisterInfo();      for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {        if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg)) diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 1e1f36a35ecc..156d1c81c238 100644 --- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -77,20 +77,20 @@ private:    /// Split unrelated subregister components and rename them to new vregs.    bool renameComponents(LiveInterval &LI) const; -  /// \brief Build a vector of SubRange infos and a union find set of +  /// Build a vector of SubRange infos and a union find set of    /// equivalence classes.    /// Returns true if more than 1 equivalence class was found.    bool findComponents(IntEqClasses &Classes,                        SmallVectorImpl<SubRangeInfo> &SubRangeInfos,                        LiveInterval &LI) const; -  /// \brief Distribute the LiveInterval segments into the new LiveIntervals +  /// Distribute the LiveInterval segments into the new LiveIntervals    /// belonging to their class.    void distribute(const IntEqClasses &Classes,                    const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,                    const SmallVectorImpl<LiveInterval*> &Intervals) const; -  /// \brief Constructs main liverange and add missing undef+dead flags. +  /// Constructs main liverange and add missing undef+dead flags.    void computeMainRangesFixFlags(const IntEqClasses &Classes,        const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,        const SmallVectorImpl<LiveInterval*> &Intervals) const; @@ -134,17 +134,17 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {    const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);    SmallVector<LiveInterval*, 4> Intervals;    Intervals.push_back(&LI); -  DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses() -        << " equivalence classes.\n"); -  DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:"); +  LLVM_DEBUG(dbgs() << printReg(Reg) << ": Found " << Classes.getNumClasses() +                    << " equivalence classes.\n"); +  LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");    for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;         ++I) {      unsigned NewVReg = MRI->createVirtualRegister(RegClass);      LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);      Intervals.push_back(&NewLI); -    DEBUG(dbgs() << ' ' << printReg(NewVReg)); +    LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg));    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');    rewriteOperands(Classes, SubRangeInfos, Intervals);    distribute(Classes, SubRangeInfos, Intervals); @@ -219,7 +219,8 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,      if (!MO.isDef() && !MO.readsReg())        continue; -    SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); +    auto *MI = MO.getParent(); +    SlotIndex Pos = LIS->getInstructionIndex(*MI);      Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())                       : Pos.getBaseIndex();      unsigned SubRegIdx = MO.getSubReg(); @@ -245,11 +246,14 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,      MO.setReg(VReg);      if (MO.isTied() && Reg != VReg) { -      /// Undef use operands are not tracked in the equivalence class but need -      /// to be update if they are tied. -      MO.getParent()->substituteRegister(Reg, VReg, 0, TRI); - -      // substituteRegister breaks the iterator, so restart. +      /// Undef use operands are not tracked in the equivalence class, +      /// but need to be updated if they are tied; take care to only +      /// update the tied operand. +      unsigned OperandNo = MI->getOperandNo(&MO); +      unsigned TiedIdx = MI->findTiedOperandIdx(OperandNo); +      MI->getOperand(TiedIdx).setReg(VReg); + +      // above substitution breaks the iterator, so restart.        I = MRI->reg_nodbg_begin(Reg);      }    } @@ -376,8 +380,8 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {    if (!MRI->subRegLivenessEnabled())      return false; -  DEBUG(dbgs() << "Renaming independent subregister live ranges in " -        << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Renaming independent subregister live ranges in " +                    << MF.getName() << '\n');    LIS = &getAnalysis<LiveIntervals>();    TII = MF.getSubtarget().getInstrInfo(); diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp index f1885aa74285..a02302e6ff99 100644 --- a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -13,9 +13,12 @@  /// happen is that the MachineFunction has the FailedISel property.  //===----------------------------------------------------------------------===// +#include "llvm/ADT/ScopeExit.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/IR/DiagnosticInfo.h"  #include "llvm/Support/Debug.h" @@ -42,12 +45,23 @@ namespace {      StringRef getPassName() const override { return "ResetMachineFunction"; } +    void getAnalysisUsage(AnalysisUsage &AU) const override { +      AU.addPreserved<StackProtector>(); +      MachineFunctionPass::getAnalysisUsage(AU); +    } +      bool runOnMachineFunction(MachineFunction &MF) override { +      // No matter what happened, whether we successfully selected the function +      // or not, nothing is going to use the vreg types after us. Make sure they +      // disappear. +      auto ClearVRegTypesOnReturn = +          make_scope_exit([&MF]() { MF.getRegInfo().clearVirtRegTypes(); }); +        if (MF.getProperties().hasProperty(                MachineFunctionProperties::Property::FailedISel)) {          if (AbortOnFailedISel)            report_fatal_error("Instruction selection failed"); -        DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n'); +        LLVM_DEBUG(dbgs() << "Resetting: " << MF.getName() << '\n');          ++NumFunctionsReset;          MF.reset();          if (EmitFallbackDiag) { @@ -65,7 +79,7 @@ namespace {  char ResetMachineFunction::ID = 0;  INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE, -                "reset machine function if ISel failed", false, false) +                "Reset machine function if ISel failed", false, false)  MachineFunctionPass *  llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false, diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp index 51233be521be..cbbbf7c385aa 100644 --- a/contrib/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp @@ -24,10 +24,12 @@  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/InlineCost.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Analysis/ScalarEvolutionExpressions.h"  #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -61,7 +63,7 @@  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/Cloning.h"  #include <algorithm>  #include <cassert>  #include <cstdint> @@ -88,6 +90,13 @@ STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");  } // namespace llvm +/// Use __safestack_pointer_address even if the platform has a faster way of +/// access safe stack pointer. +static cl::opt<bool> +    SafeStackUsePointerAddress("safestack-use-pointer-address", +                                  cl::init(false), cl::Hidden); + +  namespace {  /// Rewrite an SCEV expression for a memory access address to an expression that @@ -134,14 +143,14 @@ class SafeStack {    /// might expect to appear on the stack on most common targets.    enum { StackAlignment = 16 }; -  /// \brief Return the value of the stack canary. +  /// Return the value of the stack canary.    Value *getStackGuard(IRBuilder<> &IRB, Function &F); -  /// \brief Load stack guard from the frame and check if it has changed. +  /// Load stack guard from the frame and check if it has changed.    void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,                         AllocaInst *StackGuardSlot, Value *StackGuard); -  /// \brief Find all static allocas, dynamic allocas, return instructions and +  /// Find all static allocas, dynamic allocas, return instructions and    /// stack restore points (exception unwind blocks and setjmp calls) in the    /// given function and append them to the respective vectors.    void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, @@ -150,11 +159,11 @@ class SafeStack {                   SmallVectorImpl<ReturnInst *> &Returns,                   SmallVectorImpl<Instruction *> &StackRestorePoints); -  /// \brief Calculate the allocation size of a given alloca. Returns 0 if the +  /// Calculate the allocation size of a given alloca. Returns 0 if the    /// size can not be statically determined.    uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI); -  /// \brief Allocate space for all static allocas in \p StaticAllocas, +  /// Allocate space for all static allocas in \p StaticAllocas,    /// replace allocas with pointers into the unsafe stack and generate code to    /// restore the stack pointer before all return instructions in \p Returns.    /// @@ -167,7 +176,7 @@ class SafeStack {                                          Instruction *BasePointer,                                          AllocaInst *StackGuardSlot); -  /// \brief Generate code to restore the stack after all stack restore points +  /// Generate code to restore the stack after all stack restore points    /// in \p StackRestorePoints.    ///    /// \returns A local variable in which to maintain the dynamic top of the @@ -177,7 +186,7 @@ class SafeStack {                             ArrayRef<Instruction *> StackRestorePoints,                             Value *StaticTop, bool NeedDynamicTop); -  /// \brief Replace all allocas in \p DynamicAllocas with code to allocate +  /// Replace all allocas in \p DynamicAllocas with code to allocate    /// space dynamically on the unsafe stack and store the dynamic unsafe stack    /// top to \p DynamicTop if non-null.    void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr, @@ -191,6 +200,9 @@ class SafeStack {    bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,                      uint64_t AllocaSize); +  bool ShouldInlinePointerAddress(CallSite &CS); +  void TryInlinePointerAddress(); +  public:    SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,              ScalarEvolution &SE) @@ -230,16 +242,17 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,        ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));    bool Safe = AllocaRange.contains(AccessRange); -  DEBUG(dbgs() << "[SafeStack] " -               << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") -               << *AllocaPtr << "\n" -               << "            Access " << *Addr << "\n" -               << "            SCEV " << *Expr -               << " U: " << SE.getUnsignedRange(Expr) -               << ", S: " << SE.getSignedRange(Expr) << "\n" -               << "            Range " << AccessRange << "\n" -               << "            AllocaRange " << AllocaRange << "\n" -               << "            " << (Safe ? "safe" : "unsafe") << "\n"); +  LLVM_DEBUG( +      dbgs() << "[SafeStack] " +             << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") +             << *AllocaPtr << "\n" +             << "            Access " << *Addr << "\n" +             << "            SCEV " << *Expr +             << " U: " << SE.getUnsignedRange(Expr) +             << ", S: " << SE.getSignedRange(Expr) << "\n" +             << "            Range " << AccessRange << "\n" +             << "            AllocaRange " << AllocaRange << "\n" +             << "            " << (Safe ? "safe" : "unsafe") << "\n");    return Safe;  } @@ -286,8 +299,9 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {        case Instruction::Store:          if (V == I->getOperand(0)) {            // Stored the pointer - conservatively assume it may be unsafe. -          DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr -                       << "\n            store of address: " << *I << "\n"); +          LLVM_DEBUG(dbgs() +                     << "[SafeStack] Unsafe alloca: " << *AllocaPtr +                     << "\n            store of address: " << *I << "\n");            return false;          } @@ -312,9 +326,9 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {          if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {            if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) { -            DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr -                         << "\n            unsafe memintrinsic: " << *I -                         << "\n"); +            LLVM_DEBUG(dbgs() +                       << "[SafeStack] Unsafe alloca: " << *AllocaPtr +                       << "\n            unsafe memintrinsic: " << *I << "\n");              return false;            }            continue; @@ -332,8 +346,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {            if (A->get() == V)              if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||                                                 CS.doesNotAccessMemory()))) { -              DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr -                           << "\n            unsafe call: " << *I << "\n"); +              LLVM_DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr +                                << "\n            unsafe call: " << *I << "\n");                return false;              }          continue; @@ -545,6 +559,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(    for (Argument *Arg : ByValArguments) {      unsigned Offset = SSL.getObjectOffset(Arg); +    unsigned Align = SSL.getObjectAlignment(Arg);      Type *Ty = Arg->getType()->getPointerElementType();      uint64_t Size = DL.getTypeStoreSize(Ty); @@ -561,7 +576,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(                        DIExpression::NoDeref, -Offset, DIExpression::NoDeref);      Arg->replaceAllUsesWith(NewArg);      IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); -    IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); +    IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);    }    // Allocate space for every unsafe static AllocaInst on the unsafe stack. @@ -695,6 +710,35 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(    }  } +bool SafeStack::ShouldInlinePointerAddress(CallSite &CS) { +  Function *Callee = CS.getCalledFunction(); +  if (CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee)) +    return true; +  if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) || +      CS.isNoInline()) +    return false; +  return true; +} + +void SafeStack::TryInlinePointerAddress() { +  if (!isa<CallInst>(UnsafeStackPtr)) +    return; + +  if(F.hasFnAttribute(Attribute::OptimizeNone)) +    return; + +  CallSite CS(UnsafeStackPtr); +  Function *Callee = CS.getCalledFunction(); +  if (!Callee || Callee->isDeclaration()) +    return; + +  if (!ShouldInlinePointerAddress(CS)) +    return; + +  InlineFunctionInfo IFI; +  InlineFunction(CS, IFI); +} +  bool SafeStack::run() {    assert(F.hasFnAttribute(Attribute::SafeStack) &&           "Can't run SafeStack on a function without the attribute"); @@ -731,7 +775,13 @@ bool SafeStack::run() {      ++NumUnsafeStackRestorePointsFunctions;    IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); -  UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB); +  if (SafeStackUsePointerAddress) { +    Value *Fn = F.getParent()->getOrInsertFunction( +        "__safestack_pointer_address", StackPtrTy->getPointerTo(0)); +    UnsafeStackPtr = IRB.CreateCall(Fn); +  } else { +    UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB); +  }    // Load the current stack pointer (we'll also use it as a base pointer).    // FIXME: use a dedicated register for it ? @@ -779,7 +829,9 @@ bool SafeStack::run() {      IRB.CreateStore(BasePointer, UnsafeStackPtr);    } -  DEBUG(dbgs() << "[SafeStack]     safestack applied\n"); +  TryInlinePointerAddress(); + +  LLVM_DEBUG(dbgs() << "[SafeStack]     safestack applied\n");    return true;  } @@ -800,17 +852,17 @@ public:    }    bool runOnFunction(Function &F) override { -    DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); +    LLVM_DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");      if (!F.hasFnAttribute(Attribute::SafeStack)) { -      DEBUG(dbgs() << "[SafeStack]     safestack is not requested" -                      " for this function\n"); +      LLVM_DEBUG(dbgs() << "[SafeStack]     safestack is not requested" +                           " for this function\n");        return false;      }      if (F.isDeclaration()) { -      DEBUG(dbgs() << "[SafeStack]     function definition" -                      " is not available\n"); +      LLVM_DEBUG(dbgs() << "[SafeStack]     function definition" +                           " is not available\n");        return false;      } diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp index 072e6e090e1e..329458778a98 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp @@ -12,6 +12,7 @@  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/SmallVector.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/Instruction.h" @@ -101,10 +102,10 @@ void StackColoring::collectMarkers() {    // For each basic block, compute    // * the list of markers in the instruction order    // * the sets of allocas whose lifetime starts or ends in this BB -  DEBUG(dbgs() << "Instructions:\n"); +  LLVM_DEBUG(dbgs() << "Instructions:\n");    unsigned InstNo = 0;    for (BasicBlock *BB : depth_first(&F)) { -    DEBUG(dbgs() << "  " << InstNo << ": BB " << BB->getName() << "\n"); +    LLVM_DEBUG(dbgs() << "  " << InstNo << ": BB " << BB->getName() << "\n");      unsigned BBStart = InstNo++;      BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; @@ -121,9 +122,9 @@ void StackColoring::collectMarkers() {      }      auto ProcessMarker = [&](Instruction *I, const Marker &M) { -      DEBUG(dbgs() << "  " << InstNo << ":  " -                   << (M.IsStart ? "start " : "end   ") << M.AllocaNo << ", " -                   << *I << "\n"); +      LLVM_DEBUG(dbgs() << "  " << InstNo << ":  " +                        << (M.IsStart ? "start " : "end   ") << M.AllocaNo +                        << ", " << *I << "\n");        BBMarkers[BB].push_back({InstNo, M}); @@ -280,7 +281,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {  #endif  void StackColoring::run() { -  DEBUG(dumpAllocas()); +  LLVM_DEBUG(dumpAllocas());    for (unsigned I = 0; I < NumAllocas; ++I)      AllocaNumbering[Allocas[I]] = I; @@ -303,7 +304,7 @@ void StackColoring::run() {        LiveRanges[I] = getFullLiveRange();    calculateLocalLiveness(); -  DEBUG(dumpBlockLiveness()); +  LLVM_DEBUG(dumpBlockLiveness());    calculateLiveIntervals(); -  DEBUG(dumpLiveRanges()); +  LLVM_DEBUG(dumpLiveRanges());  } diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp index b1759359e46f..07b6a5d1883b 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -42,6 +42,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {  void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,                              const StackColoring::LiveRange &Range) {    StackObjects.push_back({V, Size, Alignment, Range}); +  ObjectAlignments[V] = Alignment;    MaxAlignment = std::max(MaxAlignment, Alignment);  } @@ -62,30 +63,30 @@ void StackLayout::layoutObject(StackObject &Obj) {      return;    } -  DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " << Obj.Alignment -               << ", range " << Obj.Range << "\n"); +  LLVM_DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " +                    << Obj.Alignment << ", range " << Obj.Range << "\n");    assert(Obj.Alignment <= MaxAlignment);    unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment);    unsigned End = Start + Obj.Size; -  DEBUG(dbgs() << "  First candidate: " << Start << " .. " << End << "\n"); +  LLVM_DEBUG(dbgs() << "  First candidate: " << Start << " .. " << End << "\n");    for (const StackRegion &R : Regions) { -    DEBUG(dbgs() << "  Examining region: " << R.Start << " .. " << R.End -                 << ", range " << R.Range << "\n"); +    LLVM_DEBUG(dbgs() << "  Examining region: " << R.Start << " .. " << R.End +                      << ", range " << R.Range << "\n");      assert(End >= R.Start);      if (Start >= R.End) { -      DEBUG(dbgs() << "  Does not intersect, skip.\n"); +      LLVM_DEBUG(dbgs() << "  Does not intersect, skip.\n");        continue;      }      if (Obj.Range.Overlaps(R.Range)) {        // Find the next appropriate location.        Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment);        End = Start + Obj.Size; -      DEBUG(dbgs() << "  Overlaps. Next candidate: " << Start << " .. " << End -                   << "\n"); +      LLVM_DEBUG(dbgs() << "  Overlaps. Next candidate: " << Start << " .. " +                        << End << "\n");        continue;      }      if (End <= R.End) { -      DEBUG(dbgs() << "  Reusing region(s).\n"); +      LLVM_DEBUG(dbgs() << "  Reusing region(s).\n");        break;      }    } @@ -94,13 +95,13 @@ void StackLayout::layoutObject(StackObject &Obj) {    if (End > LastRegionEnd) {      // Insert a new region at the end. Maybe two.      if (Start > LastRegionEnd) { -      DEBUG(dbgs() << "  Creating gap region: " << LastRegionEnd << " .. " -                   << Start << "\n"); +      LLVM_DEBUG(dbgs() << "  Creating gap region: " << LastRegionEnd << " .. " +                        << Start << "\n");        Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange());        LastRegionEnd = Start;      } -    DEBUG(dbgs() << "  Creating new region: " << LastRegionEnd << " .. " << End -                 << ", range " << Obj.Range << "\n"); +    LLVM_DEBUG(dbgs() << "  Creating new region: " << LastRegionEnd << " .. " +                      << End << ", range " << Obj.Range << "\n");      Regions.emplace_back(LastRegionEnd, End, Obj.Range);      LastRegionEnd = End;    } @@ -149,5 +150,5 @@ void StackLayout::computeLayout() {    for (auto &Obj : StackObjects)      layoutObject(Obj); -  DEBUG(print(dbgs())); +  LLVM_DEBUG(print(dbgs()));  } diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm/lib/CodeGen/SafeStackLayout.h index 7c1292f251f7..ac531d800f6e 100644 --- a/contrib/llvm/lib/CodeGen/SafeStackLayout.h +++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.h @@ -47,6 +47,7 @@ class StackLayout {    SmallVector<StackObject, 8> StackObjects;    DenseMap<const Value *, unsigned> ObjectOffsets; +  DenseMap<const Value *, unsigned> ObjectAlignments;    void layoutObject(StackObject &Obj); @@ -64,6 +65,9 @@ public:    /// Returns the offset to the object start in the stack frame.    unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; } +  /// Returns the alignment of the object +  unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } +    /// Returns the size of the entire frame.    unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; } diff --git a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index cef413f9d410..9387722bfebd 100644 --- a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -586,9 +586,6 @@ static void scalarizeMaskedScatter(CallInst *CI) {  }  bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { -  if (skipFunction(F)) -    return false; -    bool EverMadeChange = false;    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 0635e8f41ee7..46064012d9d8 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -22,6 +22,7 @@  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 9249fa84b38b..d1c5ddabb975 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -38,6 +38,7 @@  #include "llvm/CodeGen/SlotIndexes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Instruction.h" @@ -118,7 +119,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,    DbgValues.clear();    const TargetSubtargetInfo &ST = mf.getSubtarget(); -  SchedModel.init(ST.getSchedModel(), &ST, TII); +  SchedModel.init(&ST);  }  /// If this machine instr has memory reference information and it can be @@ -266,7 +267,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {    }  } -/// \brief Adds register dependencies (data, anti, and output) from this SUnit +/// Adds register dependencies (data, anti, and output) from this SUnit  /// to following instructions in the same scheduling region that depend the  /// physical register referenced at OperIdx.  void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { @@ -317,13 +318,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {    } else {      addPhysRegDataDeps(SU, OperIdx); -    // clear this register's use list -    if (Uses.contains(Reg)) -      Uses.eraseAll(Reg); - -    if (!MO.isDead()) { -      Defs.eraseAll(Reg); -    } else if (SU->isCall) { +    // Clear previous uses and defs of this register and its subergisters. +    for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) { +      if (Uses.contains(*SubReg)) +        Uses.eraseAll(*SubReg); +      if (!MO.isDead()) +        Defs.eraseAll(*SubReg); +    } +    if (MO.isDead() && SU->isCall) {        // Calls will not be reordered because of chain dependencies (see        // below). Since call operands are dead, calls may continue to be added        // to the DefList making dependence checking quadratic in the size of @@ -468,7 +470,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {      CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));  } -/// \brief Adds a register data dependency if the instruction that defines the +/// Adds a register data dependency if the instruction that defines the  /// virtual register used at OperIdx is mapped to an SUnit. Add a register  /// antidependency from this SUnit to instructions that occur later in the same  /// scheduling region if they write the virtual register. @@ -514,7 +516,7 @@ void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,    }  } -/// \brief Creates an SUnit for each real instruction, numbered in top-down +/// Creates an SUnit for each real instruction, numbered in top-down  /// topological order. The instruction order A < B, implies that no edge exists  /// from B to A.  /// @@ -532,7 +534,7 @@ void ScheduleDAGInstrs::initSUnits() {    SUnits.reserve(NumRegionInstrs);    for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue;      SUnit *SU = newSUnit(&MI); @@ -763,6 +765,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,        DbgMI = &MI;        continue;      } +    if (MI.isDebugLabel()) +      continue; +      SUnit *SU = MISUnitMap[&MI];      assert(SU && "No SUnit mapped to this MI"); @@ -845,8 +850,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,          BarrierChain->addPredBarrier(SU);        BarrierChain = SU; -      DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" -            << BarrierChain->NodeNum << ").\n";); +      LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" +                        << BarrierChain->NodeNum << ").\n";);        // Add dependencies against everything below it and clear maps.        addBarrierChain(Stores); @@ -934,11 +939,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,      // Reduce maps if they grow huge.      if (Stores.size() + Loads.size() >= HugeRegion) { -      DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";); +      LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);        reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());      }      if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) { -      DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";); +      LLVM_DEBUG( +          dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);        reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());      }    } @@ -978,10 +984,8 @@ void ScheduleDAGInstrs::Value2SUsMap::dump() {  void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,                                                Value2SUsMap &loads, unsigned N) { -  DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; -        stores.dump(); -        dbgs() << "Loading SUnits:\n"; -        loads.dump()); +  LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump(); +             dbgs() << "Loading SUnits:\n"; loads.dump());    // Insert all SU's NodeNums into a vector and sort it.    std::vector<unsigned> NodeNums; @@ -992,7 +996,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,    for (auto &I : loads)      for (auto *SU : I.second)        NodeNums.push_back(SU->NodeNum); -  std::sort(NodeNums.begin(), NodeNums.end()); +  llvm::sort(NodeNums.begin(), NodeNums.end());    // The N last elements in NodeNums will be removed, and the SU with    // the lowest NodeNum of them will become the new BarrierChain to @@ -1007,12 +1011,12 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,      if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {        BarrierChain->addPredBarrier(newBarrierChain);        BarrierChain = newBarrierChain; -      DEBUG(dbgs() << "Inserting new barrier chain: SU(" -            << BarrierChain->NodeNum << ").\n";); +      LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU(" +                        << BarrierChain->NodeNum << ").\n";);      }      else -      DEBUG(dbgs() << "Keeping old barrier chain: SU(" -            << BarrierChain->NodeNum << ").\n";); +      LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU(" +                        << BarrierChain->NodeNum << ").\n";);    }    else      BarrierChain = newBarrierChain; @@ -1020,10 +1024,8 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,    insertBarrierChain(stores);    insertBarrierChain(loads); -  DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; -        stores.dump(); -        dbgs() << "Loading SUnits:\n"; -        loads.dump()); +  LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump(); +             dbgs() << "Loading SUnits:\n"; loads.dump());  }  static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, @@ -1044,14 +1046,14 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,  }  void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { -  DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n'); +  LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');    LiveRegs.init(*TRI);    LiveRegs.addLiveOuts(MBB);    // Examine block from end to start...    for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { -    if (MI.isDebugValue()) +    if (MI.isDebugInstr())        continue;      // Update liveness.  Registers that are defed but not used in this @@ -1087,7 +1089,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {        while (I->isBundledWithSucc())          ++I;        do { -        if (!I->isDebugValue()) +        if (!I->isDebugInstr())            toggleKills(MRI, LiveRegs, *I, true);          --I;        } while(I != First); @@ -1212,7 +1214,7 @@ public:      RootSet[SU->NodeNum] = RData;    } -  /// \brief Called once for each tree edge after calling visitPostOrderNode on +  /// Called once for each tree edge after calling visitPostOrderNode on    /// the predecessor. Increment the parent node's instruction count and    /// preemptively join this subtree to its parent's if it is small enough.    void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { @@ -1245,11 +1247,11 @@ public:      }      R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());      R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); -    DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); +    LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");      for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {        R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; -      DEBUG(dbgs() << "  SU(" << Idx << ") in tree " -            << R.DFSNodeData[Idx].SubtreeID << '\n'); +      LLVM_DEBUG(dbgs() << "  SU(" << Idx << ") in tree " +                        << R.DFSNodeData[Idx].SubtreeID << '\n');      }      for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {        unsigned PredTree = SubtreeClasses[P.first->NodeNum]; @@ -1404,8 +1406,8 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {    for (const Connection &C : SubtreeConnections[SubtreeID]) {      SubtreeConnectLevels[C.TreeID] =        std::max(SubtreeConnectLevels[C.TreeID], C.Level); -    DEBUG(dbgs() << "  Tree: " << C.TreeID -          << " @" << SubtreeConnectLevels[C.TreeID] << '\n'); +    LLVM_DEBUG(dbgs() << "  Tree: " << C.TreeID << " @" +                      << SubtreeConnectLevels[C.TreeID] << '\n');    }  } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 37c4a470bd0a..ff2085aae865 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -61,7 +61,7 @@ namespace llvm {      } -    std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); +    std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *Graph);      static std::string getNodeAttributes(const SUnit *N,                                           const ScheduleDAG *Graph) {        return "shape=Mrecord"; diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index b789e2d9c52c..b8bfe69a76e1 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -16,6 +16,7 @@  #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"  #include "llvm/CodeGen/ScheduleDAG.h"  #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/MC/MCInstrDesc.h"  #include "llvm/MC/MCInstrItineraries.h"  #include "llvm/Support/Compiler.h" @@ -68,12 +69,12 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(    // If MaxLookAhead is not set above, then we are not enabled.    if (!isEnabled()) -    DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); +    LLVM_DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");    else {      // A nonempty itinerary must have a SchedModel.      IssueWidth = ItinData->SchedModel.IssueWidth; -    DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " -          << ScoreboardDepth << '\n'); +    LLVM_DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " +                      << ScoreboardDepth << '\n');    }  } @@ -155,9 +156,9 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {        }        if (!freeUnits) { -        DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); -        DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); -        DEBUG(DAG->dumpNode(SU)); +        LLVM_DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); +        LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); +        LLVM_DEBUG(DAG->dumpNode(SU));          return Hazard;        }      } @@ -223,8 +224,8 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {      cycle += IS->getNextCycles();    } -  DEBUG(ReservedScoreboard.dump()); -  DEBUG(RequiredScoreboard.dump()); +  LLVM_DEBUG(ReservedScoreboard.dump()); +  LLVM_DEBUG(RequiredScoreboard.dump());  }  void ScoreboardHazardRecognizer::AdvanceCycle() { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 03cb2e310c7e..7a99687757f8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -36,7 +36,6 @@  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" @@ -60,6 +59,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h" @@ -122,7 +122,7 @@ namespace {      bool LegalTypes = false;      bool ForCodeSize; -    /// \brief Worklist of all of the nodes that need to be simplified. +    /// Worklist of all of the nodes that need to be simplified.      ///      /// This must behave as a stack -- new nodes to process are pushed onto the      /// back and when processing we pop off of the back. @@ -131,14 +131,14 @@ namespace {      /// due to nodes being deleted from the underlying DAG.      SmallVector<SDNode *, 64> Worklist; -    /// \brief Mapping from an SDNode to its position on the worklist. +    /// Mapping from an SDNode to its position on the worklist.      ///      /// This is used to find and remove nodes from the worklist (by nulling      /// them) when they are deleted from the underlying DAG. It relies on      /// stable indices of nodes within the worklist.      DenseMap<SDNode *, unsigned> WorklistMap; -    /// \brief Set of nodes which have been combined (at least once). +    /// Set of nodes which have been combined (at least once).      ///      /// This is used to allow us to reliably add any operands of a DAG node      /// which have not yet been combined to the worklist. @@ -232,14 +232,25 @@ namespace {        return SimplifyDemandedBits(Op, Demanded);      } +    /// Check the specified vector node value to see if it can be simplified or +    /// if things it uses can be simplified as it only uses some of the +    /// elements. If so, return true. +    bool SimplifyDemandedVectorElts(SDValue Op) { +      unsigned NumElts = Op.getValueType().getVectorNumElements(); +      APInt Demanded = APInt::getAllOnesValue(NumElts); +      return SimplifyDemandedVectorElts(Op, Demanded); +    } +      bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); +    bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, +                                    bool AssumeSingleUse = false);      bool CombineToPreIndexedLoadStore(SDNode *N);      bool CombineToPostIndexedLoadStore(SDNode *N);      SDValue SplitIndexingFromLoad(LoadSDNode *LD);      bool SliceUpLoad(SDNode *N); -    /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed +    /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed      ///   load.      ///      /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. @@ -258,10 +269,6 @@ namespace {      SDValue PromoteExtend(SDValue Op);      bool PromoteLoad(SDValue Op); -    void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, -                         SDValue ExtLoad, const SDLoc &DL, -                         ISD::NodeType ExtType); -      /// Call the node-specific routine that knows how to fold each      /// particular type of node. If that doesn't do anything, try the      /// target-specific DAG combines. @@ -292,7 +299,9 @@ namespace {      SDValue visitMUL(SDNode *N);      SDValue useDivRem(SDNode *N);      SDValue visitSDIV(SDNode *N); +    SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);      SDValue visitUDIV(SDNode *N); +    SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);      SDValue visitREM(SDNode *N);      SDValue visitMULHU(SDNode *N);      SDValue visitMULHS(SDNode *N); @@ -302,9 +311,9 @@ namespace {      SDValue visitUMULO(SDNode *N);      SDValue visitIMINMAX(SDNode *N);      SDValue visitAND(SDNode *N); -    SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); +    SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);      SDValue visitOR(SDNode *N); -    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); +    SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);      SDValue visitXOR(SDNode *N);      SDValue SimplifyVBinOp(SDNode *N);      SDValue visitSHL(SDNode *N); @@ -323,7 +332,6 @@ namespace {      SDValue visitVSELECT(SDNode *N);      SDValue visitSELECT_CC(SDNode *N);      SDValue visitSETCC(SDNode *N); -    SDValue visitSETCCE(SDNode *N);      SDValue visitSETCCCARRY(SDNode *N);      SDValue visitSIGN_EXTEND(SDNode *N);      SDValue visitZERO_EXTEND(SDNode *N); @@ -385,8 +393,8 @@ namespace {      SDValue visitFMULForFMADistributiveCombine(SDNode *N);      SDValue XformToShuffleWithZero(SDNode *N); -    SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, -                           SDValue RHS); +    SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, +                           SDValue N1);      SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); @@ -403,8 +411,11 @@ namespace {                                     SDValue N2, SDValue N3, ISD::CondCode CC);      SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,                                const SDLoc &DL); +    SDValue unfoldMaskedMerge(SDNode *N); +    SDValue unfoldExtremeBitClearingToShifts(SDNode *N);      SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, -                          const SDLoc &DL, bool foldBooleans = true); +                          const SDLoc &DL, bool foldBooleans); +    SDValue rebuildSetCC(SDValue N);      bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,                             SDValue &CC) const; @@ -414,20 +425,21 @@ namespace {                                           unsigned HiOp);      SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);      SDValue CombineExtLoad(SDNode *N); +    SDValue CombineZExtLogicopShiftLoad(SDNode *N);      SDValue combineRepeatedFPDivisors(SDNode *N);      SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);      SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);      SDValue BuildSDIV(SDNode *N);      SDValue BuildSDIVPow2(SDNode *N);      SDValue BuildUDIV(SDNode *N); -    SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); +    SDValue BuildLogBase2(SDValue V, const SDLoc &DL);      SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);      SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);      SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);      SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); -    SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, +    SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,                                  SDNodeFlags Flags, bool Reciprocal); -    SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, +    SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,                                  SDNodeFlags Flags, bool Reciprocal);      SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,                                 bool DemandHighBits = true); @@ -442,13 +454,14 @@ namespace {      SDValue ReduceLoadOpStoreWidth(SDNode *N);      SDValue splitMergedValStore(StoreSDNode *ST);      SDValue TransformFPLoadStorePair(SDNode *N); +    SDValue convertBuildVecZextToZext(SDNode *N);      SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);      SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);      SDValue reduceBuildVecToShuffle(SDNode *N);      SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,                                    ArrayRef<int> VectorMask, SDValue VecIn1,                                    SDValue VecIn2, unsigned LeftIdx); -    SDValue matchVSelectOpSizesWithSetCC(SDNode *N); +    SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);      /// Walk up chain skipping non-aliasing memory nodes,      /// looking for aliasing nodes and adding them to the Aliases vector. @@ -500,15 +513,15 @@ namespace {      bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,                            EVT LoadResultTy, EVT &ExtVT); -    /// Helper function to calculate whether the given Load can have its +    /// Helper function to calculate whether the given Load/Store can have its      /// width reduced to ExtVT. -    bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, -                           EVT &ExtVT, unsigned ShAmt = 0); +    bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, +                           EVT &MemVT, unsigned ShAmt = 0);      /// Used by BackwardsPropagateMask to find suitable loads.      bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, -                           SmallPtrSetImpl<SDNode*> &NodeWithConsts, -                           ConstantSDNode *Mask, SDNode *&UncombinedNode); +                           SmallPtrSetImpl<SDNode*> &NodesWithConsts, +                           ConstantSDNode *Mask, SDNode *&NodeToMask);      /// Attempt to propagate a given AND node back to load leaves so that they      /// can be combined into narrow loads.      bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); @@ -530,23 +543,28 @@ namespace {      /// This is a helper function for MergeConsecutiveStores. Stores      /// that potentially may be merged with St are placed in -    /// StoreNodes. +    /// StoreNodes. RootNode is a chain predecessor to all store +    /// candidates.      void getStoreMergeCandidates(StoreSDNode *St, -                                 SmallVectorImpl<MemOpLink> &StoreNodes); +                                 SmallVectorImpl<MemOpLink> &StoreNodes, +                                 SDNode *&Root);      /// Helper function for MergeConsecutiveStores. Checks if      /// candidate stores have indirect dependency through their -    /// operands. \return True if safe to merge. +    /// operands. RootNode is the predecessor to all stores calculated +    /// by getStoreMergeCandidates and is used to prune the dependency check. +    /// \return True if safe to merge.      bool checkMergeStoreCandidatesForDependencies( -        SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); +        SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, +        SDNode *RootNode);      /// Merge consecutive store operations into a wide store.      /// This optimization uses wide integers or vectors when possible.      /// \return number of stores that were merged into a merged store (the      /// affected nodes are stored as a prefix in \p StoreNodes). -    bool MergeConsecutiveStores(StoreSDNode *N); +    bool MergeConsecutiveStores(StoreSDNode *St); -    /// \brief Try to transform a truncation where C is a constant: +    /// Try to transform a truncation where C is a constant:      ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))      ///      /// \p N needs to be a truncation and its first operand an AND. Other @@ -554,6 +572,16 @@ namespace {      /// single-use) and if missed an empty SDValue is returned.      SDValue distributeTruncateThroughAnd(SDNode *N); +    /// Helper function to determine whether the target supports operation +    /// given by \p Opcode for type \p VT, that is, whether the operation +    /// is legal or custom before legalizing operations, and whether is +    /// legal (but not custom) after legalization. +    bool hasOperation(unsigned Opcode, EVT VT) { +      if (LegalOperations) +        return TLI.isOperationLegal(Opcode, VT); +      return TLI.isOperationLegalOrCustom(Opcode, VT); +    } +    public:      /// Runs the dag combiner on all nodes in the work list      void Run(CombineLevel AtLevel); @@ -564,11 +592,7 @@ namespace {      /// legalization these can be huge.      EVT getShiftAmountTy(EVT LHSTy) {        assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); -      if (LHSTy.isVector()) -        return LHSTy; -      auto &DL = DAG.getDataLayout(); -      return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) -                        : TLI.getPointerTy(DL); +      return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);      }      /// This method returns true if we are running before type legalization or @@ -582,6 +606,10 @@ namespace {      EVT getSetCCResultType(EVT VT) const {        return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);      } + +    void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, +                         SDValue OrigLoad, SDValue ExtLoad, +                         ISD::NodeType ExtType);    };  /// This class is a DAGUpdateListener that removes any deleted @@ -657,8 +685,13 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,    // fneg is removable even if it has multiple uses.    if (Op.getOpcode() == ISD::FNEG) return 2; -  // Don't allow anything with multiple uses. -  if (!Op.hasOneUse()) return 0; +  // Don't allow anything with multiple uses unless we know it is free. +  EVT VT = Op.getValueType(); +  const SDNodeFlags Flags = Op->getFlags(); +  if (!Op.hasOneUse()) +    if (!(Op.getOpcode() == ISD::FP_EXTEND && +          TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) +      return 0;    // Don't recurse exponentially.    if (Depth > 6) return 0; @@ -671,17 +704,15 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,      // Don't invert constant FP values after legalization unless the target says      // the negated constant is legal. -    EVT VT = Op.getValueType();      return TLI.isOperationLegal(ISD::ConstantFP, VT) ||        TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);    }    case ISD::FADD: -    // FIXME: determine better conditions for this xform. -    if (!Options->UnsafeFPMath) return 0; +    if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) +      return 0;      // After operation legalization, it might not be legal to create new FSUBs. -    if (LegalOperations && -        !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType())) +    if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))        return 0;      // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) @@ -694,7 +725,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,    case ISD::FSUB:      // We can't turn -(A-B) into B-A when we honor signed zeros.      if (!Options->NoSignedZerosFPMath && -        !Op.getNode()->getFlags().hasNoSignedZeros()) +        !Flags.hasNoSignedZeros())        return 0;      // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -702,8 +733,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,    case ISD::FMUL:    case ISD::FDIV: -    if (Options->HonorSignDependentRoundingFPMath()) return 0; -      // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))      if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,                                      Options, Depth + 1)) @@ -727,9 +756,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,    // fneg is removable even if it has multiple uses.    if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); -  // Don't allow anything with multiple uses. -  assert(Op.hasOneUse() && "Unknown reuse!"); -    assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");    const SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -742,8 +768,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,      return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());    }    case ISD::FADD: -    // FIXME: determine better conditions for this xform. -    assert(Options.UnsafeFPMath); +    assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());      // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)      if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -769,8 +794,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,    case ISD::FMUL:    case ISD::FDIV: -    assert(!Options.HonorSignDependentRoundingFPMath()); -      // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)      if (isNegatibleForFree(Op.getOperand(0), LegalOperations,                             DAG.getTargetLoweringInfo(), &Options, Depth+1)) @@ -846,7 +869,13 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {    return false;  } -// \brief Returns the SDNode if it is a constant float BuildVector +static SDValue peekThroughBitcast(SDValue V) { +  while (V.getOpcode() == ISD::BITCAST) +    V = V.getOperand(0); +  return V; +} + +// Returns the SDNode if it is a constant float BuildVector  // or constant float.  static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {    if (isa<ConstantFPSDNode>(N)) @@ -880,6 +909,7 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {  // constant null integer (with no undefs).  // Build vector implicit truncation is not an issue for null values.  static bool isNullConstantOrNullSplatConstant(SDValue N) { +  // TODO: may want to use peekThroughBitcast() here.    if (ConstantSDNode *Splat = isConstOrConstSplat(N))      return Splat->isNullValue();    return false; @@ -889,6 +919,7 @@ static bool isNullConstantOrNullSplatConstant(SDValue N) {  // constant integer of one (with no undefs).  // Do not permit build vector implicit truncation.  static bool isOneConstantOrOneSplatConstant(SDValue N) { +  // TODO: may want to use peekThroughBitcast() here.    unsigned BitWidth = N.getScalarValueSizeInBits();    if (ConstantSDNode *Splat = isConstOrConstSplat(N))      return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; @@ -899,6 +930,7 @@ static bool isOneConstantOrOneSplatConstant(SDValue N) {  // constant integer of all ones (with no undefs).  // Do not permit build vector implicit truncation.  static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { +  N = peekThroughBitcast(N);    unsigned BitWidth = N.getScalarValueSizeInBits();    if (ConstantSDNode *Splat = isConstOrConstSplat(N))      return Splat->isAllOnesValue() && @@ -913,56 +945,6 @@ static bool isAnyConstantBuildVector(const SDNode *N) {           ISD::isBuildVectorOfConstantFPSDNodes(N);  } -// Attempt to match a unary predicate against a scalar/splat constant or -// every element of a constant BUILD_VECTOR. -static bool matchUnaryPredicate(SDValue Op, -                                std::function<bool(ConstantSDNode *)> Match) { -  if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) -    return Match(Cst); - -  if (ISD::BUILD_VECTOR != Op.getOpcode()) -    return false; - -  EVT SVT = Op.getValueType().getScalarType(); -  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { -    auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); -    if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) -      return false; -  } -  return true; -} - -// Attempt to match a binary predicate against a pair of scalar/splat constants -// or every element of a pair of constant BUILD_VECTORs. -static bool matchBinaryPredicate( -    SDValue LHS, SDValue RHS, -    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { -  if (LHS.getValueType() != RHS.getValueType()) -    return false; - -  if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) -    if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) -      return Match(LHSCst, RHSCst); - -  if (ISD::BUILD_VECTOR != LHS.getOpcode() || -      ISD::BUILD_VECTOR != RHS.getOpcode()) -    return false; - -  EVT SVT = LHS.getValueType().getScalarType(); -  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { -    auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); -    auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); -    if (!LHSCst || !RHSCst) -      return false; -    if (LHSCst->getValueType(0) != SVT || -        LHSCst->getValueType(0) != RHSCst->getValueType(0)) -      return false; -    if (!Match(LHSCst, RHSCst)) -      return false; -  } -  return true; -} -  SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,                                      SDValue N1) {    EVT VT = N0.getValueType(); @@ -1013,11 +995,9 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,                                 bool AddTo) {    assert(N->getNumValues() == NumTo && "Broken CombineTo call!");    ++NodesCombined; -  DEBUG(dbgs() << "\nReplacing.1 "; -        N->dump(&DAG); -        dbgs() << "\nWith: "; -        To[0].getNode()->dump(&DAG); -        dbgs() << " and " << NumTo-1 << " other values\n"); +  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; +             To[0].getNode()->dump(&DAG); +             dbgs() << " and " << NumTo - 1 << " other values\n");    for (unsigned i = 0, e = NumTo; i != e; ++i)      assert((!To[i].getNode() ||              N->getValueType(i) == To[i].getValueType()) && @@ -1074,11 +1054,33 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {    // Replace the old value with the new one.    ++NodesCombined; -  DEBUG(dbgs() << "\nReplacing.2 "; -        TLO.Old.getNode()->dump(&DAG); -        dbgs() << "\nWith: "; -        TLO.New.getNode()->dump(&DAG); -        dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); +             dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); +             dbgs() << '\n'); + +  CommitTargetLoweringOpt(TLO); +  return true; +} + +/// Check the specified vector node value to see if it can be simplified or +/// if things it uses can be simplified as it only uses some of the elements. +/// If so, return true. +bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, +                                             bool AssumeSingleUse) { +  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); +  APInt KnownUndef, KnownZero; +  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO, +                                      0, AssumeSingleUse)) +    return false; + +  // Revisit the node. +  AddToWorklist(Op.getNode()); + +  // Replace the old value with the new one. +  ++NodesCombined; +  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); +             dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); +             dbgs() << '\n');    CommitTargetLoweringOpt(TLO);    return true; @@ -1089,11 +1091,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {    EVT VT = Load->getValueType(0);    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); -  DEBUG(dbgs() << "\nReplacing.9 "; -        Load->dump(&DAG); -        dbgs() << "\nWith: "; -        Trunc.getNode()->dump(&DAG); -        dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; +             Trunc.getNode()->dump(&DAG); dbgs() << '\n');    WorklistRemover DeadNodes(*this);    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); @@ -1107,10 +1106,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {    if (ISD::isUNINDEXEDLoad(Op.getNode())) {      LoadSDNode *LD = cast<LoadSDNode>(Op);      EVT MemVT = LD->getMemoryVT(); -    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) -      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD -                                                       : ISD::EXTLOAD) -      : LD->getExtensionType(); +    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD +                                                      : LD->getExtensionType();      Replace = true;      return DAG.getExtLoad(ExtType, DL, PVT,                            LD->getChain(), LD->getBasePtr(), @@ -1194,7 +1191,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {    if (TLI.IsDesirableToPromoteOp(Op, PVT)) {      assert(PVT != VT && "Don't know what type to promote to!"); -    DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); +    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));      bool Replace0 = false;      SDValue N0 = Op.getOperand(0); @@ -1259,7 +1256,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {    if (TLI.IsDesirableToPromoteOp(Op, PVT)) {      assert(PVT != VT && "Don't know what type to promote to!"); -    DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); +    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));      bool Replace = false;      SDValue N0 = Op.getOperand(0); @@ -1311,8 +1308,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) {      // fold (aext (aext x)) -> (aext x)      // fold (aext (zext x)) -> (zext x)      // fold (aext (sext x)) -> (sext x) -    DEBUG(dbgs() << "\nPromoting "; -          Op.getNode()->dump(&DAG)); +    LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));      return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));    }    return SDValue(); @@ -1345,20 +1341,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {      SDNode *N = Op.getNode();      LoadSDNode *LD = cast<LoadSDNode>(N);      EVT MemVT = LD->getMemoryVT(); -    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) -      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD -                                                       : ISD::EXTLOAD) -      : LD->getExtensionType(); +    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD +                                                      : LD->getExtensionType();      SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,                                     LD->getChain(), LD->getBasePtr(),                                     MemVT, LD->getMemOperand());      SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); -    DEBUG(dbgs() << "\nPromoting "; -          N->dump(&DAG); -          dbgs() << "\nTo: "; -          Result.getNode()->dump(&DAG); -          dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; +               Result.getNode()->dump(&DAG); dbgs() << '\n');      WorklistRemover DeadNodes(*this);      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); @@ -1369,7 +1360,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {    return false;  } -/// \brief Recursively delete a node which has no uses and any operands for +/// Recursively delete a node which has no uses and any operands for  /// which it is the only use.  ///  /// Note that this both deletes the nodes and removes them from the worklist. @@ -1453,7 +1444,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {          continue;      } -    DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); +    LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));      // Add any operands of the new node which have not yet been combined to the      // worklist as well. Because the worklist uniques things already, this @@ -1481,8 +1472,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {             RV.getOpcode() != ISD::DELETED_NODE &&             "Node was deleted but visit returned new node!"); -    DEBUG(dbgs() << " ... into: "; -          RV.getNode()->dump(&DAG)); +    LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));      if (N->getNumValues() == RV.getNode()->getNumValues())        DAG.ReplaceAllUsesWith(N, RV.getNode()); @@ -1558,7 +1548,6 @@ SDValue DAGCombiner::visit(SDNode *N) {    case ISD::VSELECT:            return visitVSELECT(N);    case ISD::SELECT_CC:          return visitSELECT_CC(N);    case ISD::SETCC:              return visitSETCC(N); -  case ISD::SETCCE:             return visitSETCCE(N);    case ISD::SETCCCARRY:         return visitSETCCCARRY(N);    case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);    case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N); @@ -1708,6 +1697,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {        return N->getOperand(1);    } +  // Don't simplify token factors if optnone. +  if (OptLevel == CodeGenOpt::None) +    return SDValue(); +    SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.    SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.    SmallPtrSet<SDNode*, 16> SeenOps; @@ -1893,16 +1886,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {            BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&           "Unexpected binary operator"); -  // Bail out if any constants are opaque because we can't constant fold those. -  SDValue C1 = BO->getOperand(1); -  if (!isConstantOrConstantVector(C1, true) && -      !isConstantFPBuildVectorOrConstantFP(C1)) -    return SDValue(); -    // Don't do this unless the old select is going away. We want to eliminate the    // binary operator, not replace a binop with a select.    // TODO: Handle ISD::SELECT_CC. +  unsigned SelOpNo = 0;    SDValue Sel = BO->getOperand(0); +  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { +    SelOpNo = 1; +    Sel = BO->getOperand(1); +  } +    if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())      return SDValue(); @@ -1916,19 +1909,48 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {        !isConstantFPBuildVectorOrConstantFP(CF))      return SDValue(); +  // Bail out if any constants are opaque because we can't constant fold those. +  // The exception is "and" and "or" with either 0 or -1 in which case we can +  // propagate non constant operands into select. I.e.: +  // and (select Cond, 0, -1), X --> select Cond, 0, X +  // or X, (select Cond, -1, 0) --> select Cond, -1, X +  bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && +                         (isNullConstantOrNullSplatConstant(CT) || +                          isAllOnesConstantOrAllOnesSplatConstant(CT)) && +                         (isNullConstantOrNullSplatConstant(CF) || +                          isAllOnesConstantOrAllOnesSplatConstant(CF)); + +  SDValue CBO = BO->getOperand(SelOpNo ^ 1); +  if (!CanFoldNonConst && +      !isConstantOrConstantVector(CBO, true) && +      !isConstantFPBuildVectorOrConstantFP(CBO)) +    return SDValue(); + +  EVT VT = Sel.getValueType(); + +  // In case of shift value and shift amount may have different VT. For instance +  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have +  // swapped operands and value types do not match. NB: x86 is fine if operands +  // are not swapped with shift amount VT being not bigger than shifted value. +  // TODO: that is possible to check for a shift operation, correct VTs and +  // still perform optimization on x86 if needed. +  if (SelOpNo && VT != CBO.getValueType()) +    return SDValue(); +    // We have a select-of-constants followed by a binary operator with a    // constant. Eliminate the binop by pulling the constant math into the select. -  // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1 -  EVT VT = Sel.getValueType(); +  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO    SDLoc DL(Sel); -  SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); -  if (!NewCT.isUndef() && +  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) +                          : DAG.getNode(BinOpcode, DL, VT, CT, CBO); +  if (!CanFoldNonConst && !NewCT.isUndef() &&        !isConstantOrConstantVector(NewCT, true) &&        !isConstantFPBuildVectorOrConstantFP(NewCT))      return SDValue(); -  SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); -  if (!NewCF.isUndef() && +  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) +                          : DAG.getNode(BinOpcode, DL, VT, CF, CBO); +  if (!CanFoldNonConst && !NewCF.isUndef() &&        !isConstantOrConstantVector(NewCF, true) &&        !isConstantFPBuildVectorOrConstantFP(NewCF))      return SDValue(); @@ -1936,6 +1958,84 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {    return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);  } +static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { +  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && +         "Expecting add or sub"); + +  // Match a constant operand and a zext operand for the math instruction: +  // add Z, C +  // sub C, Z +  bool IsAdd = N->getOpcode() == ISD::ADD; +  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); +  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); +  auto *CN = dyn_cast<ConstantSDNode>(C); +  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) +    return SDValue(); + +  // Match the zext operand as a setcc of a boolean. +  if (Z.getOperand(0).getOpcode() != ISD::SETCC || +      Z.getOperand(0).getValueType() != MVT::i1) +    return SDValue(); + +  // Match the compare as: setcc (X & 1), 0, eq. +  SDValue SetCC = Z.getOperand(0); +  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); +  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || +      SetCC.getOperand(0).getOpcode() != ISD::AND || +      !isOneConstant(SetCC.getOperand(0).getOperand(1))) +    return SDValue(); + +  // We are adding/subtracting a constant and an inverted low bit. Turn that +  // into a subtract/add of the low bit with incremented/decremented constant: +  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) +  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) +  EVT VT = C.getValueType(); +  SDLoc DL(N); +  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); +  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : +                       DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); +  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); +} + +/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into +/// a shift and add with a different constant. +static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { +  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && +         "Expecting add or sub"); + +  // We need a constant operand for the add/sub, and the other operand is a +  // logical shift right: add (srl), C or sub C, (srl). +  bool IsAdd = N->getOpcode() == ISD::ADD; +  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); +  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); +  ConstantSDNode *C = isConstOrConstSplat(ConstantOp); +  if (!C || ShiftOp.getOpcode() != ISD::SRL) +    return SDValue(); + +  // The shift must be of a 'not' value. +  // TODO: Use isBitwiseNot() if it works with vectors. +  SDValue Not = ShiftOp.getOperand(0); +  if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR || +      !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1))) +    return SDValue(); + +  // The shift must be moving the sign bit to the least-significant-bit. +  EVT VT = ShiftOp.getValueType(); +  SDValue ShAmt = ShiftOp.getOperand(1); +  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); +  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1) +    return SDValue(); + +  // Eliminate the 'not' by adjusting the shift and add/sub constant: +  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) +  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) +  SDLoc DL(N); +  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; +  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); +  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; +  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); +} +  SDValue DAGCombiner::visitADD(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -2067,6 +2167,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) {                           DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));    } +  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) +    return V; + +  if (SDValue V = foldAddSubOfSignBit(N, DAG)) +    return V; +    if (SimplifyDemandedBits(SDValue(N, 0)))      return SDValue(N, 0); @@ -2075,6 +2181,11 @@ SDValue DAGCombiner::visitADD(SDNode *N) {        DAG.haveNoCommonBitsSet(N0, N1))      return DAG.getNode(ISD::OR, DL, VT, N0, N1); +  // fold (add (xor a, -1), 1) -> (sub 0, a) +  if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) +    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), +                       N0.getOperand(0)); +    if (SDValue Combined = visitADDLike(N0, N1, N))      return Combined; @@ -2210,6 +2321,38 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {    return SDValue();  } +static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, +                           SelectionDAG &DAG, const TargetLowering &TLI) { +  SDValue Cst; +  switch (TLI.getBooleanContents(VT)) { +  case TargetLowering::ZeroOrOneBooleanContent: +  case TargetLowering::UndefinedBooleanContent: +    Cst = DAG.getConstant(1, DL, VT); +    break; +  case TargetLowering::ZeroOrNegativeOneBooleanContent: +    Cst = DAG.getConstant(-1, DL, VT); +    break; +  } + +  return DAG.getNode(ISD::XOR, DL, VT, V, Cst); +} + +static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { +  if (V.getOpcode() != ISD::XOR) return false; +  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1)); +  if (!Const) return false; + +  switch(TLI.getBooleanContents(VT)) { +    case TargetLowering::ZeroOrOneBooleanContent: +      return Const->isOne(); +    case TargetLowering::ZeroOrNegativeOneBooleanContent: +      return Const->isAllOnesValue(); +    case TargetLowering::UndefinedBooleanContent: +      return (Const->getAPIntValue() & 0x01) == 1; +  } +  llvm_unreachable("Unsupported boolean content"); +} +  SDValue DAGCombiner::visitUADDO(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -2240,6 +2383,15 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {      return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),                       DAG.getConstant(0, DL, CarryVT)); +  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. +  if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) { +    SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), +                              DAG.getConstant(0, DL, VT), +                              N0.getOperand(0)); +    return CombineTo(N, Sub, +                     flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); +  } +    if (SDValue Combined = visitUADDOLike(N0, N1, N))      return Combined; @@ -2303,13 +2455,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {      return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);    // fold (addcarry x, y, false) -> (uaddo x, y) -  if (isNullConstant(CarryIn)) -    return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); +  if (isNullConstant(CarryIn)) { +    if (!LegalOperations || +        TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) +      return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); +  } + +  EVT CarryVT = CarryIn.getValueType();    // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.    if (isNullConstant(N0) && isNullConstant(N1)) {      EVT VT = N0.getValueType(); -    EVT CarryVT = CarryIn.getValueType();      SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);      AddToWorklist(CarryExt.getNode());      return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, @@ -2317,6 +2473,16 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {                       DAG.getConstant(0, DL, CarryVT));    } +  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. +  if (isBitwiseNot(N0) && isNullConstant(N1) && +      isBooleanFlip(CarryIn, CarryVT, TLI)) { +    SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), +                              DAG.getConstant(0, DL, N0.getValueType()), +                              N0.getOperand(0), CarryIn.getOperand(0)); +    return CombineTo(N, Sub, +                     flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); +  } +    if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))      return Combined; @@ -2458,6 +2624,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {    if (isAllOnesConstantOrAllOnesSplatConstant(N0))      return DAG.getNode(ISD::XOR, DL, VT, N1, N0); +  // fold (A - (0-B)) -> A+B +  if (N1.getOpcode() == ISD::SUB && +      isNullConstantOrNullSplatConstant(N1.getOperand(0))) +    return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); +    // fold A-(A-B) -> B    if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))      return N1.getOperand(1); @@ -2500,12 +2671,50 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {      return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),                         N0.getOperand(1).getOperand(0)); +  // fold (X - (-Y * Z)) -> (X + (Y * Z)) +  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { +    if (N1.getOperand(0).getOpcode() == ISD::SUB && +        isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) { +      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, +                                N1.getOperand(0).getOperand(1), +                                N1.getOperand(1)); +      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); +    } +    if (N1.getOperand(1).getOpcode() == ISD::SUB && +        isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) { +      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, +                                N1.getOperand(0), +                                N1.getOperand(1).getOperand(1)); +      return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); +    } +  } +    // If either operand of a sub is undef, the result is undef    if (N0.isUndef())      return N0;    if (N1.isUndef())      return N1; +  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) +    return V; + +  if (SDValue V = foldAddSubOfSignBit(N, DAG)) +    return V; + +  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) +  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { +    if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { +      SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); +      SDValue S0 = N1.getOperand(0); +      if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { +        unsigned OpSizeInBits = VT.getScalarSizeInBits(); +        if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) +          if (C->getAPIntValue() == (OpSizeInBits - 1)) +            return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); +      } +    } +  } +    // If the relocation model supports it, consider symbol offsets.    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))      if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { @@ -2612,8 +2821,11 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {    SDValue CarryIn = N->getOperand(2);    // fold (subcarry x, y, false) -> (usubo x, y) -  if (isNullConstant(CarryIn)) -    return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); +  if (isNullConstant(CarryIn)) { +    if (!LegalOperations || +        TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) +      return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); +  }    return SDValue();  } @@ -2689,11 +2901,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {        (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {      SDLoc DL(N);      SDValue LogBase2 = BuildLogBase2(N1, DL); -    AddToWorklist(LogBase2.getNode()); -      EVT ShiftVT = getShiftAmountTy(N0.getValueType());      SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); -    AddToWorklist(Trunc.getNode());      return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);    }    // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c @@ -2816,9 +3025,10 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {    SDValue Op1 = Node->getOperand(1);    SDValue combined;    for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), -         UE = Op0.getNode()->use_end(); UI != UE;) { -    SDNode *User = *UI++; -    if (User == Node || User->use_empty()) +         UE = Op0.getNode()->use_end(); UI != UE; ++UI) { +    SDNode *User = *UI; +    if (User == Node || User->getOpcode() == ISD::DELETED_NODE || +        User->use_empty())        continue;      // Convert the other matching node(s), too;      // otherwise, the DIVREM may get target-legalized into something @@ -2868,6 +3078,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0); +  EVT CCVT = getSetCCResultType(VT);    // fold vector ops    if (VT.isVector()) @@ -2887,6 +3098,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {    // fold (sdiv X, -1) -> 0-X    if (N1C && N1C->isAllOnesValue())      return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); +  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) +  if (N1C && N1C->getAPIntValue().isMinSignedValue()) +    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), +                         DAG.getConstant(1, DL, VT), +                         DAG.getConstant(0, DL, VT));    if (SDValue V = simplifyDivRem(N, DAG))      return V; @@ -2899,45 +3115,90 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))      return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); +  if (SDValue V = visitSDIVLike(N0, N1, N)) +    return V; + +  // sdiv, srem -> sdivrem +  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is +  // true.  Otherwise, we break the simplification logic in visitREM(). +  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); +  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) +    if (SDValue DivRem = useDivRem(N)) +        return DivRem; + +  return SDValue(); +} + +SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { +  SDLoc DL(N); +  EVT VT = N->getValueType(0); +  EVT CCVT = getSetCCResultType(VT); +  unsigned BitWidth = VT.getScalarSizeInBits(); + +  ConstantSDNode *N1C = isConstOrConstSplat(N1); + +  // Helper for determining whether a value is a power-2 constant scalar or a +  // vector of such elements. +  auto IsPowerOfTwo = [](ConstantSDNode *C) { +    if (C->isNullValue() || C->isOpaque()) +      return false; +    if (C->getAPIntValue().isPowerOf2()) +      return true; +    if ((-C->getAPIntValue()).isPowerOf2()) +      return true; +    return false; +  }; +    // fold (sdiv X, pow2) -> simple ops after legalize    // FIXME: We check for the exact bit here because the generic lowering gives    // better results in that case. The target-specific lowering should learn how    // to handle exact sdivs efficiently. -  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && -      !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() || -                                    (-N1C->getAPIntValue()).isPowerOf2())) { +  if (!N->getFlags().hasExact() && +      ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {      // Target-specific implementation of sdiv x, pow2.      if (SDValue Res = BuildSDIVPow2(N))        return Res; -    unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); +    // Create constants that are functions of the shift amount value. +    EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); +    SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); +    SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); +    C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); +    SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); +    if (!isConstantOrConstantVector(Inexact)) +      return SDValue();      // Splat the sign bit into the register -    SDValue SGN = -        DAG.getNode(ISD::SRA, DL, VT, N0, -                    DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, -                                    getShiftAmountTy(N0.getValueType()))); -    AddToWorklist(SGN.getNode()); +    SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, +                               DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); +    AddToWorklist(Sign.getNode());      // Add (N0 < 0) ? abs2 - 1 : 0; -    SDValue SRL = -        DAG.getNode(ISD::SRL, DL, VT, SGN, -                    DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, -                                    getShiftAmountTy(SGN.getValueType()))); -    SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); -    AddToWorklist(SRL.getNode()); -    AddToWorklist(ADD.getNode());    // Divide by pow2 -    SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, -                  DAG.getConstant(lg2, DL, -                                  getShiftAmountTy(ADD.getValueType()))); - -    // If we're dividing by a positive value, we're done.  Otherwise, we must -    // negate the result. -    if (N1C->getAPIntValue().isNonNegative()) -      return SRA; - -    AddToWorklist(SRA.getNode()); -    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); +    SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); +    AddToWorklist(Srl.getNode()); +    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); +    AddToWorklist(Add.getNode()); +    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); +    AddToWorklist(Sra.getNode()); + +    // Special case: (sdiv X, 1) -> X +    // Special Case: (sdiv X, -1) -> 0-X +    SDValue One = DAG.getConstant(1, DL, VT); +    SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); +    SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ); +    SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ); +    SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes); +    Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra); + +    // If dividing by a positive value, we're done. Otherwise, the result must +    // be negated. +    SDValue Zero = DAG.getConstant(0, DL, VT); +    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); + +    // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. +    SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT); +    SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra); +    return Res;    }    // If integer divide is expensive and we satisfy the requirements, emit an @@ -2948,13 +3209,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {      if (SDValue Op = BuildSDIV(N))        return Op; -  // sdiv, srem -> sdivrem -  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is -  // true.  Otherwise, we break the simplification logic in visitREM(). -  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) -    if (SDValue DivRem = useDivRem(N)) -        return DivRem; -    return SDValue();  } @@ -2962,6 +3216,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0); +  EVT CCVT = getSetCCResultType(VT);    // fold vector ops    if (VT.isVector()) @@ -2977,6 +3232,14 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {      if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,                                                      N0C, N1C))        return Folded; +  // fold (udiv X, 1) -> X +  if (N1C && N1C->isOne()) +    return N0; +  // fold (udiv X, -1) -> select(X == -1, 1, 0) +  if (N1C && N1C->getAPIntValue().isAllOnesValue()) +    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), +                         DAG.getConstant(1, DL, VT), +                         DAG.getConstant(0, DL, VT));    if (SDValue V = simplifyDivRem(N, DAG))      return V; @@ -2984,6 +3247,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {    if (SDValue NewSel = foldBinOpIntoSelect(N))      return NewSel; +  if (SDValue V = visitUDIVLike(N0, N1, N)) +    return V; + +  // sdiv, srem -> sdivrem +  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is +  // true.  Otherwise, we break the simplification logic in visitREM(). +  AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); +  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) +    if (SDValue DivRem = useDivRem(N)) +        return DivRem; + +  return SDValue(); +} + +SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { +  SDLoc DL(N); +  EVT VT = N->getValueType(0); + +  ConstantSDNode *N1C = isConstOrConstSplat(N1); +    // fold (udiv x, (1 << c)) -> x >>u c    if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&        DAG.isKnownToBeAPowerOfTwo(N1)) { @@ -3019,13 +3302,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {      if (SDValue Op = BuildUDIV(N))        return Op; -  // sdiv, srem -> sdivrem -  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is -  // true.  Otherwise, we break the simplification logic in visitREM(). -  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) -    if (SDValue DivRem = useDivRem(N)) -        return DivRem; -    return SDValue();  } @@ -3035,6 +3311,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0); +  EVT CCVT = getSetCCResultType(VT); +    bool isSigned = (Opcode == ISD::SREM);    SDLoc DL(N); @@ -3044,6 +3322,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) {    if (N0C && N1C)      if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))        return Folded; +  // fold (urem X, -1) -> select(X == -1, 0, x) +  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) +    return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), +                         DAG.getConstant(0, DL, VT), N0);    if (SDValue V = simplifyDivRem(N, DAG))      return V; @@ -3077,22 +3359,19 @@ SDValue DAGCombiner::visitREM(SDNode *N) {    // If X/C can be simplified by the division-by-constant logic, lower    // X%C to the equivalent of X-X/C*C. -  // To avoid mangling nodes, this simplification requires that the combine() -  // call for the speculative DIV must not cause a DIVREM conversion.  We guard -  // against this by skipping the simplification if isIntDivCheap().  When -  // div is not cheap, combine will not return a DIVREM.  Regardless, -  // checking cheapness here makes sense since the simplification results in -  // fatter code. -  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { -    unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; -    SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); -    AddToWorklist(Div.getNode()); -    SDValue OptimizedDiv = combine(Div.getNode()); -    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { -      assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && -             (OptimizedDiv.getOpcode() != ISD::SDIVREM)); +  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the +  // speculative DIV must not cause a DIVREM conversion.  We guard against this +  // by skipping the simplification if isIntDivCheap().  When div is not cheap, +  // combine will not return a DIVREM.  Regardless, checking cheapness here +  // makes sense since the simplification results in fatter code. +  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { +    SDValue OptimizedDiv = +        isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); +    if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM && +        OptimizedDiv.getOpcode() != ISD::SDIVREM) {        SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);        SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); +      AddToWorklist(OptimizedDiv.getNode());        AddToWorklist(Mul.getNode());        return Sub;      } @@ -3350,6 +3629,25 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {       !DAG.isConstantIntBuildVectorOrConstantInt(N1))      return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); +  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. +  // Only do this if the current op isn't legal and the flipped is. +  unsigned Opcode = N->getOpcode(); +  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  if (!TLI.isOperationLegal(Opcode, VT) && +      (N0.isUndef() || DAG.SignBitIsZero(N0)) && +      (N1.isUndef() || DAG.SignBitIsZero(N1))) { +    unsigned AltOpcode; +    switch (Opcode) { +    case ISD::SMIN: AltOpcode = ISD::UMIN; break; +    case ISD::SMAX: AltOpcode = ISD::UMAX; break; +    case ISD::UMIN: AltOpcode = ISD::SMIN; break; +    case ISD::UMAX: AltOpcode = ISD::SMAX; break; +    default: llvm_unreachable("Unknown MINMAX opcode"); +    } +    if (TLI.isOperationLegal(AltOpcode, VT)) +      return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); +  } +    return SDValue();  } @@ -3469,9 +3767,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {            ShOp = SDValue();        } -      // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) -      // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C) -      // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) +      // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C) +      // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C) +      // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)        if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {          SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,                                        N0->getOperand(0), N1->getOperand(0)); @@ -3490,9 +3788,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {            ShOp = SDValue();        } -      // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) -      // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B)) -      // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) +      // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B)) +      // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B)) +      // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))        if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {          SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,                                        N0->getOperand(1), N1->getOperand(1)); @@ -3525,7 +3823,7 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,    // operations on the left and right operands, so those types must match.    EVT VT = N0.getValueType();    EVT OpVT = LL.getValueType(); -  if (LegalOperations || VT != MVT::i1) +  if (LegalOperations || VT.getScalarType() != MVT::i1)      if (VT != getSetCCResultType(OpVT))        return SDValue();    if (OpVT != RL.getValueType()) @@ -3762,53 +4060,78 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,    return true;  } -bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, -                                    EVT &ExtVT, unsigned ShAmt) { -  // Don't transform one with multiple uses, this would require adding a new -  // load. -  if (!SDValue(LoadN, 0).hasOneUse()) +bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, +                                    ISD::LoadExtType ExtType, EVT &MemVT, +                                    unsigned ShAmt) { +  if (!LDST)      return false; - -  if (LegalOperations && -      !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT)) +  // Only allow byte offsets. +  if (ShAmt % 8)      return false;    // Do not generate loads of non-round integer types since these can    // be expensive (and would be wrong if the type is not byte sized). -  if (!ExtVT.isRound()) +  if (!MemVT.isRound())      return false;    // Don't change the width of a volatile load. -  if (LoadN->isVolatile()) +  if (LDST->isVolatile())      return false;    // Verify that we are actually reducing a load width here. -  if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits()) +  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())      return false; -  // For the transform to be legal, the load must produce only two values -  // (the value loaded and the chain).  Don't transform a pre-increment -  // load, for example, which produces an extra value.  Otherwise the -  // transformation is not equivalent, and the downstream logic to replace -  // uses gets things wrong. -  if (LoadN->getNumValues() > 2) -    return false; - -  // If the load that we're shrinking is an extload and we're not just -  // discarding the extension we can't simply shrink the load. Bail. -  // TODO: It would be possible to merge the extensions in some cases. -  if (LoadN->getExtensionType() != ISD::NON_EXTLOAD && -      LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) -    return false; - -  if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT)) +  // Ensure that this isn't going to produce an unsupported unaligned access. +  if (ShAmt && +      !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, +                              LDST->getAddressSpace(), ShAmt / 8))      return false;    // It's not possible to generate a constant of extended or untyped type. -  EVT PtrType = LoadN->getOperand(1).getValueType(); +  EVT PtrType = LDST->getBasePtr().getValueType();    if (PtrType == MVT::Untyped || PtrType.isExtended())      return false; +  if (isa<LoadSDNode>(LDST)) { +    LoadSDNode *Load = cast<LoadSDNode>(LDST); +    // Don't transform one with multiple uses, this would require adding a new +    // load. +    if (!SDValue(Load, 0).hasOneUse()) +      return false; + +    if (LegalOperations && +        !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)) +      return false; + +    // For the transform to be legal, the load must produce only two values +    // (the value loaded and the chain).  Don't transform a pre-increment +    // load, for example, which produces an extra value.  Otherwise the +    // transformation is not equivalent, and the downstream logic to replace +    // uses gets things wrong. +    if (Load->getNumValues() > 2) +      return false; + +    // If the load that we're shrinking is an extload and we're not just +    // discarding the extension we can't simply shrink the load. Bail. +    // TODO: It would be possible to merge the extensions in some cases. +    if (Load->getExtensionType() != ISD::NON_EXTLOAD && +        Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) +      return false; + +    if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT)) +      return false; +  } else { +    assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"); +    StoreSDNode *Store = cast<StoreSDNode>(LDST); +    // Can't write outside the original store +    if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) +      return false; + +    if (LegalOperations && +        !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT)) +      return false; +  }    return true;  } @@ -3841,7 +4164,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,        auto *Load = cast<LoadSDNode>(Op);        EVT ExtVT;        if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && -          isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { +          isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {          // ZEXTLOAD is already small enough.          if (Load->getExtensionType() == ISD::ZEXTLOAD && @@ -3882,7 +4205,23 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,      // Allow one node which will masked along with any loads found.      if (NodeToMask)        return false; +  +    // Also ensure that the node to be masked only produces one data result.       NodeToMask = Op.getNode(); +    if (NodeToMask->getNumValues() > 1) { +      bool HasValue = false; +      for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { +        MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); +        if (VT != MVT::Glue && VT != MVT::Other) { +          if (HasValue) { +            NodeToMask = nullptr; +            return false; +          } +          HasValue = true; +        } +      } +      assert(HasValue && "Node to be masked has no data result?"); +    }    }    return true;  } @@ -3906,19 +4245,19 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {      if (Loads.size() == 0)        return false; -    DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); +    LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());      SDValue MaskOp = N->getOperand(1);      // If it exists, fixup the single node we allow in the tree that needs      // masking.      if (FixupNode) { -      DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); +      LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());        SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),                                  FixupNode->getValueType(0),                                  SDValue(FixupNode, 0), MaskOp);        DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); -      DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), -                             MaskOp); +      if (And.getOpcode() == ISD ::AND) +        DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);      }      // Narrow any constants that need it. @@ -3937,11 +4276,13 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {      // Create narrow loads.      for (auto *Load : Loads) { -      DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); +      LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());        SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),                                  SDValue(Load, 0), MaskOp);        DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); -      DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); +      if (And.getOpcode() == ISD ::AND) +        And = SDValue( +            DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);        SDValue NewLoad = ReduceLoadWidth(And.getNode());        assert(NewLoad &&               "Shouldn't be masking the load if it can't be narrowed"); @@ -3953,6 +4294,60 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {    return false;  } +// Unfold +//    x &  (-1 'logical shift' y) +// To +//    (x 'opposite logical shift' y) 'logical shift' y +// if it is better for performance. +SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { +  assert(N->getOpcode() == ISD::AND); + +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); + +  // Do we actually prefer shifts over mask? +  if (!TLI.preferShiftsToClearExtremeBits(N0)) +    return SDValue(); + +  // Try to match  (-1 '[outer] logical shift' y) +  unsigned OuterShift; +  unsigned InnerShift; // The opposite direction to the OuterShift. +  SDValue Y;           // Shift amount. +  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { +    if (!M.hasOneUse()) +      return false; +    OuterShift = M->getOpcode(); +    if (OuterShift == ISD::SHL) +      InnerShift = ISD::SRL; +    else if (OuterShift == ISD::SRL) +      InnerShift = ISD::SHL; +    else +      return false; +    if (!isAllOnesConstant(M->getOperand(0))) +      return false; +    Y = M->getOperand(1); +    return true; +  }; + +  SDValue X; +  if (matchMask(N1)) +    X = N0; +  else if (matchMask(N0)) +    X = N1; +  else +    return SDValue(); + +  SDLoc DL(N); +  EVT VT = N->getValueType(0); + +  //     tmp = x   'opposite logical shift' y +  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); +  //     ret = tmp 'logical shift' y +  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); + +  return T1; +} +  SDValue DAGCombiner::visitAND(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -4019,7 +4414,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {      return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());    };    if (N0.getOpcode() == ISD::OR && -      matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) +      ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))      return N1;    // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -4250,6 +4645,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {        return BSwap;    } +  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) +    return Shifts; +    return SDValue();  } @@ -4276,7 +4674,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,      if (!N0.getNode()->hasOneUse())        return SDValue();      ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); -    if (!N01C || N01C->getZExtValue() != 0xFF00) +    // Also handle 0xffff since the LHS is guaranteed to have zeros there. +    // This is needed for X86. +    if (!N01C || (N01C->getZExtValue() != 0xFF00 && +                  N01C->getZExtValue() != 0xFFFF))        return SDValue();      N0 = N0.getOperand(0);      LookPassAnd0 = true; @@ -4323,7 +4724,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,      if (!N10.getNode()->hasOneUse())        return SDValue();      ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); -    if (!N101C || N101C->getZExtValue() != 0xFF00) +    // Also allow 0xFFFF since the bits will be shifted out. This is needed +    // for X86. +    if (!N101C || (N101C->getZExtValue() != 0xFF00 && +                   N101C->getZExtValue() != 0xFFFF))        return SDValue();      N10 = N10.getOperand(0);      LookPassAnd1 = true; @@ -4394,6 +4798,14 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {      return false;    case 0xFF:       MaskByteOffset = 0; break;    case 0xFF00:     MaskByteOffset = 1; break; +  case 0xFFFF: +    // In case demanded bits didn't clear the bits that will be shifted out. +    // This is needed for X86. +    if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { +      MaskByteOffset = 1; +      break; +    } +    return false;    case 0xFF0000:   MaskByteOffset = 2; break;    case 0xFF000000: MaskByteOffset = 3; break;    } @@ -4708,7 +5120,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {      return LHS->getAPIntValue().intersects(RHS->getAPIntValue());    };    if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && -      matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { +      ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {      if (SDValue COR = DAG.FoldConstantArithmetic(              ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {        SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); @@ -4764,7 +5176,8 @@ bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {  // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate  // in direction shift1 by Neg.  The range [0, EltSize) means that we only need  // to consider shift amounts with defined behavior. -static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, +                           SelectionDAG &DAG) {    // If EltSize is a power of 2 then:    //    //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -4799,9 +5212,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {    unsigned MaskLoBits = 0;    if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {      if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { -      if (NegC->getAPIntValue() == EltSize - 1) { +      KnownBits Known; +      DAG.computeKnownBits(Neg.getOperand(0), Known); +      unsigned Bits = Log2_64(EltSize); +      if (NegC->getAPIntValue().getActiveBits() <= Bits && +          ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {          Neg = Neg.getOperand(0); -        MaskLoBits = Log2_64(EltSize); +        MaskLoBits = Bits;        }      }    } @@ -4816,10 +5233,16 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {    // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with    // Pos'.  The truncation is redundant for the purpose of the equality. -  if (MaskLoBits && Pos.getOpcode() == ISD::AND) -    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) -      if (PosC->getAPIntValue() == EltSize - 1) +  if (MaskLoBits && Pos.getOpcode() == ISD::AND) { +    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { +      KnownBits Known; +      DAG.computeKnownBits(Pos.getOperand(0), Known); +      if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && +          ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= +           MaskLoBits))          Pos = Pos.getOperand(0); +    } +  }    // The condition we need is now:    // @@ -4875,7 +5298,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,    //          (srl x, (*ext y))) ->    //   (rotr x, y) or (rotl x, (sub 32, y))    EVT VT = Shifted.getValueType(); -  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { +  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {      bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);      return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,                         HasPos ? Pos : Neg).getNode(); @@ -4893,8 +5316,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {    if (!TLI.isTypeLegal(VT)) return nullptr;    // The target must have at least one rotate flavor. -  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); -  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); +  bool HasROTL = hasOperation(ISD::ROTL, VT); +  bool HasROTR = hasOperation(ISD::ROTR, VT);    if (!HasROTL && !HasROTR) return nullptr;    // Check for truncated rotate. @@ -4943,7 +5366,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {                                          ConstantSDNode *RHS) {      return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;    }; -  if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { +  if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {      SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,                                LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -5200,7 +5623,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {    Optional<BaseIndexOffset> Base;    SDValue Chain; -  SmallSet<LoadSDNode *, 8> Loads; +  SmallPtrSet<LoadSDNode *, 8> Loads;    Optional<ByteProvider> FirstByteProvider;    int64_t FirstOffset = INT64_MAX; @@ -5299,6 +5722,88 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {    return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;  } +// If the target has andn, bsl, or a similar bit-select instruction, +// we want to unfold masked merge, with canonical pattern of: +//   |        A  |  |B| +//   ((x ^ y) & m) ^ y +//    |  D  | +// Into: +//   (x & m) | (y & ~m) +// If y is a constant, and the 'andn' does not work with immediates, +// we unfold into a different pattern: +//   ~(~x & m) & (m | y) +// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at +//       the very least that breaks andnpd / andnps patterns, and because those +//       patterns are simplified in IR and shouldn't be created in the DAG +SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { +  assert(N->getOpcode() == ISD::XOR); + +  // Don't touch 'not' (i.e. where y = -1). +  if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1))) +    return SDValue(); + +  EVT VT = N->getValueType(0); + +  // There are 3 commutable operators in the pattern, +  // so we have to deal with 8 possible variants of the basic pattern. +  SDValue X, Y, M; +  auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { +    if (And.getOpcode() != ISD::AND || !And.hasOneUse()) +      return false; +    SDValue Xor = And.getOperand(XorIdx); +    if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) +      return false; +    SDValue Xor0 = Xor.getOperand(0); +    SDValue Xor1 = Xor.getOperand(1); +    // Don't touch 'not' (i.e. where y = -1). +    if (isAllOnesConstantOrAllOnesSplatConstant(Xor1)) +      return false; +    if (Other == Xor0) +      std::swap(Xor0, Xor1); +    if (Other != Xor1) +      return false; +    X = Xor0; +    Y = Xor1; +    M = And.getOperand(XorIdx ? 0 : 1); +    return true; +  }; + +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); +  if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && +      !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) +    return SDValue(); + +  // Don't do anything if the mask is constant. This should not be reachable. +  // InstCombine should have already unfolded this pattern, and DAGCombiner +  // probably shouldn't produce it, too. +  if (isa<ConstantSDNode>(M.getNode())) +    return SDValue(); + +  // We can transform if the target has AndNot +  if (!TLI.hasAndNot(M)) +    return SDValue(); + +  SDLoc DL(N); + +  // If Y is a constant, check that 'andn' works with immediates. +  if (!TLI.hasAndNot(Y)) { +    assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); +    // If not, we need to do a bit more work to make sure andn is still used. +    SDValue NotX = DAG.getNOT(DL, X, VT); +    SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); +    SDValue NotLHS = DAG.getNOT(DL, LHS, VT); +    SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); +    return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); +  } + +  SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); +  SDValue NotM = DAG.getNOT(DL, M, VT); +  SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); + +  return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); +} +  SDValue DAGCombiner::visitXOR(SDNode *N) {    SDValue N0 = N->getOperand(0);    SDValue N1 = N->getOperand(1); @@ -5378,7 +5883,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {    }    // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc -  if (isOneConstant(N1) && VT == MVT::i1 && +  if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&        (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {      SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);      if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { @@ -5390,7 +5895,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {      }    }    // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants -  if (isAllOnesConstant(N1) && +  if (isAllOnesConstant(N1) && N0.hasOneUse() &&        (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {      SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);      if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { @@ -5411,13 +5916,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {    }    // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) -  unsigned OpSizeInBits = VT.getScalarSizeInBits(); -  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && -      N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && -      TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { -    if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) -      if (C->getAPIntValue() == (OpSizeInBits - 1)) -        return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); +  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { +    SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1; +    SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1; +    if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { +      SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); +      SDValue S0 = S.getOperand(0); +      if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { +        unsigned OpSizeInBits = VT.getScalarSizeInBits(); +        if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) +          if (C->getAPIntValue() == (OpSizeInBits - 1)) +            return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); +      } +    }    }    // fold (xor x, x) -> 0 @@ -5454,6 +5965,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {      if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))        return Tmp; +  // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable +  if (SDValue MM = unfoldMaskedMerge(N)) +    return MM; +    // Simplify the expression using non-local knowledge.    if (SimplifyDemandedBits(SDValue(N, 0)))      return SDValue(N, 0); @@ -5656,7 +6171,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {    auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {      return Val->getAPIntValue().uge(OpSizeInBits);    }; -  if (matchUnaryPredicate(N1, MatchShiftTooBig)) +  if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))      return DAG.getUNDEF(VT);    // fold (shl x, 0) -> x    if (N1C && N1C->isNullValue()) @@ -5691,7 +6206,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {        zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);        return (c1 + c2).uge(OpSizeInBits);      }; -    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) +    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))        return DAG.getConstant(0, SDLoc(N), VT);      auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, @@ -5701,7 +6216,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {        zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);        return (c1 + c2).ult(OpSizeInBits);      }; -    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { +    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {        SDLoc DL(N);        EVT ShiftVT = N1.getValueType();        SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); @@ -5877,7 +6392,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {    auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {      return Val->getAPIntValue().uge(OpSizeInBits);    }; -  if (matchUnaryPredicate(N1, MatchShiftTooBig)) +  if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))      return DAG.getUNDEF(VT);    // fold (sra x, 0) -> x    if (N1C && N1C->isNullValue()) @@ -5912,7 +6427,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {        zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);        return (c1 + c2).uge(OpSizeInBits);      }; -    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) +    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))        return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),                           DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); @@ -5923,7 +6438,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {        zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);        return (c1 + c2).ult(OpSizeInBits);      }; -    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { +    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {        SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));        return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);      } @@ -6041,7 +6556,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {    auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {      return Val->getAPIntValue().uge(OpSizeInBits);    }; -  if (matchUnaryPredicate(N1, MatchShiftTooBig)) +  if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))      return DAG.getUNDEF(VT);    // fold (srl x, 0) -> x    if (N1C && N1C->isNullValue()) @@ -6064,7 +6579,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {        zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);        return (c1 + c2).uge(OpSizeInBits);      }; -    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) +    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))        return DAG.getConstant(0, SDLoc(N), VT);      auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, @@ -6074,7 +6589,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {        zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);        return (c1 + c2).ult(OpSizeInBits);      }; -    if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { +    if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {        SDLoc DL(N);        EVT ShiftVT = N1.getValueType();        SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); @@ -6285,6 +6800,13 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {    // fold (ctlz c1) -> c2    if (DAG.isConstantIntBuildVectorOrConstantInt(N0))      return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); + +  // If the value is known never to be zero, switch to the undef version. +  if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { +    if (DAG.isKnownNeverZero(N0)) +      return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); +  } +    return SDValue();  } @@ -6305,6 +6827,13 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {    // fold (cttz c1) -> c2    if (DAG.isConstantIntBuildVectorOrConstantInt(N0))      return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + +  // If the value is known never to be zero, switch to the undef version. +  if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { +    if (DAG.isKnownNeverZero(N0)) +      return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); +  } +    return SDValue();  } @@ -6328,7 +6857,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {    return SDValue();  } -/// \brief Generate Min/Max node +/// Generate Min/Max node  static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,                                     SDValue RHS, SDValue True, SDValue False,                                     ISD::CondCode CC, const TargetLowering &TLI, @@ -6443,9 +6972,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {    // in another basic block or it could require searching a complicated    // expression.    if (CondVT.isInteger() && -      TLI.getBooleanContents(false, true) == +      TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==            TargetLowering::ZeroOrOneBooleanContent && -      TLI.getBooleanContents(false, false) == +      TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==            TargetLowering::ZeroOrOneBooleanContent &&        C1->isNullValue() && C2->isOne()) {      SDValue NotCond = @@ -6574,15 +7103,10 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {      }    } -  // select (xor Cond, 1), X, Y -> select Cond, Y, X    if (VT0 == MVT::i1) { -    if (N0->getOpcode() == ISD::XOR) { -      if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { -        SDValue Cond0 = N0->getOperand(0); -        if (C->isOne()) -          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); -      } -    } +    // select (not Cond), N1, N2 -> select Cond, N2, N1 +    if (isBitwiseNot(N0)) +      return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);    }    // fold selects based on a setcc into other things, such as min/max/abs @@ -6726,6 +7250,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {    SDValue DataLo, DataHi;    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); +  SDValue Scale = MSC->getScale();    SDValue BasePtr = MSC->getBasePtr();    SDValue IndexLo, IndexHi;    std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); @@ -6735,11 +7260,11 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),                            Alignment, MSC->getAAInfo(), MSC->getRanges()); -  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; +  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };    Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),                              DL, OpsLo, MMO); -  SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; +  SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };    Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),                              DL, OpsHi, MMO); @@ -6800,12 +7325,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {      Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,                                       MST->isCompressingStore()); +    unsigned HiOffset = LoMemVT.getStoreSize(); -    MMO = DAG.getMachineFunction(). -      getMachineMemOperand(MST->getPointerInfo(), -                           MachineMemOperand::MOStore,  HiMemVT.getStoreSize(), -                           SecondHalfAlignment, MST->getAAInfo(), -                           MST->getRanges()); +    MMO = DAG.getMachineFunction().getMachineMemOperand( +        MST->getPointerInfo().getWithOffset(HiOffset), +        MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, +        MST->getAAInfo(), MST->getRanges());      Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,                              MST->isTruncatingStore(), @@ -6859,6 +7384,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {    EVT LoMemVT, HiMemVT;    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); +  SDValue Scale = MGT->getScale();    SDValue BasePtr = MGT->getBasePtr();    SDValue Index = MGT->getIndex();    SDValue IndexLo, IndexHi; @@ -6869,13 +7395,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {                            MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),                            Alignment, MGT->getAAInfo(), MGT->getRanges()); -  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; +  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };    Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, -                            MMO); +                           MMO); -  SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; +  SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };    Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, -                            MMO); +                           MMO);    AddToWorklist(Lo.getNode());    AddToWorklist(Hi.getNode()); @@ -6949,11 +7475,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {      Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,                                       MLD->isExpandingLoad()); +    unsigned HiOffset = LoMemVT.getStoreSize(); -    MMO = DAG.getMachineFunction(). -    getMachineMemOperand(MLD->getPointerInfo(), -                         MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(), -                         SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); +    MMO = DAG.getMachineFunction().getMachineMemOperand( +        MLD->getPointerInfo().getWithOffset(HiOffset), +        MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, +        MLD->getAAInfo(), MLD->getRanges());      Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,                             ISD::NON_EXTLOAD, MLD->isExpandingLoad()); @@ -7071,6 +7598,36 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {        AddToWorklist(Add.getNode());        return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);      } + +    // If this select has a condition (setcc) with narrower operands than the +    // select, try to widen the compare to match the select width. +    // TODO: This should be extended to handle any constant. +    // TODO: This could be extended to handle non-loading patterns, but that +    //       requires thorough testing to avoid regressions. +    if (isNullConstantOrNullSplatConstant(RHS)) { +      EVT NarrowVT = LHS.getValueType(); +      EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); +      EVT SetCCVT = getSetCCResultType(LHS.getValueType()); +      unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); +      unsigned WideWidth = WideVT.getScalarSizeInBits(); +      bool IsSigned = isSignedIntSetCC(CC); +      auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; +      if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && +          SetCCWidth != 1 && SetCCWidth < WideWidth && +          TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && +          TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { +        // Both compare operands can be widened for free. The LHS can use an +        // extended load, and the RHS is a constant: +        //   vselect (ext (setcc load(X), C)), N1, N2 --> +        //   vselect (setcc extload(X), C'), N1, N2 +        auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; +        SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); +        SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); +        EVT WideSetCCVT = getSetCCResultType(WideVT); +        SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); +        return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); +      } +    }    }    if (SimplifySelectOps(N, N1, N2)) @@ -7142,22 +7699,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {  }  SDValue DAGCombiner::visitSETCC(SDNode *N) { -  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), -                       cast<CondCodeSDNode>(N->getOperand(2))->get(), -                       SDLoc(N)); -} +  // setcc is very commonly used as an argument to brcond. This pattern +  // also lend itself to numerous combines and, as a result, it is desired +  // we keep the argument to a brcond as a setcc as much as possible. +  bool PreferSetCC = +      N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; -SDValue DAGCombiner::visitSETCCE(SDNode *N) { -  SDValue LHS = N->getOperand(0); -  SDValue RHS = N->getOperand(1); -  SDValue Carry = N->getOperand(2); -  SDValue Cond = N->getOperand(3); +  SDValue Combined = SimplifySetCC( +      N->getValueType(0), N->getOperand(0), N->getOperand(1), +      cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); -  // If Carry is false, fold to a regular SETCC. -  if (Carry.getOpcode() == ISD::CARRY_FALSE) -    return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); +  if (!Combined) +    return SDValue(); -  return SDValue(); +  // If we prefer to have a setcc, and we don't, we'll try our best to +  // recreate one using rebuildSetCC. +  if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { +    SDValue NewSetCC = rebuildSetCC(Combined); + +    // We don't have anything interesting to combine to. +    if (NewSetCC.getNode() == N) +      return SDValue(); + +    if (NewSetCC) +      return NewSetCC; +  } + +  return Combined;  }  SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { @@ -7237,12 +7805,12 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,  // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"  // transformation. Returns true if extension are possible and the above  // mentioned transformation is profitable. -static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, +static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,                                      unsigned ExtOpc,                                      SmallVectorImpl<SDNode *> &ExtendNodes,                                      const TargetLowering &TLI) {    bool HasCopyToRegUses = false; -  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); +  bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());    for (SDNode::use_iterator UI = N0.getNode()->use_begin(),                              UE = N0.getNode()->use_end();         UI != UE; ++UI) { @@ -7298,16 +7866,16 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,  }  void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, -                                  SDValue Trunc, SDValue ExtLoad, -                                  const SDLoc &DL, ISD::NodeType ExtType) { +                                  SDValue OrigLoad, SDValue ExtLoad, +                                  ISD::NodeType ExtType) {    // Extend SetCC uses if necessary. -  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { -    SDNode *SetCC = SetCCs[i]; +  SDLoc DL(ExtLoad); +  for (SDNode *SetCC : SetCCs) {      SmallVector<SDValue, 4> Ops;      for (unsigned j = 0; j != 2; ++j) {        SDValue SOp = SetCC->getOperand(j); -      if (SOp == Trunc) +      if (SOp == OrigLoad)          Ops.push_back(ExtLoad);        else          Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); @@ -7356,7 +7924,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {      return SDValue();    SmallVector<SDNode *, 4> SetCCs; -  if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) +  if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))      return SDValue();    ISD::LoadExtType ExtType = @@ -7387,7 +7955,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {      const unsigned Align = MinAlign(LN0->getAlignment(), Offset);      SDValue SplitLoad = DAG.getExtLoad( -        ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, +        ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,          LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,          LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -7410,12 +7978,82 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {    // with a truncate of the concatenated sextloaded vectors.    SDValue Trunc =        DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); +  ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());    CombineTo(N0.getNode(), Trunc, NewChain); -  ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, -                  (ISD::NodeType)N->getOpcode());    return SDValue(N, 0); // Return N so it doesn't get rechecked!  } +// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> +//      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) +SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { +  assert(N->getOpcode() == ISD::ZERO_EXTEND); +  EVT VT = N->getValueType(0); + +  // and/or/xor +  SDValue N0 = N->getOperand(0); +  if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || +        N0.getOpcode() == ISD::XOR) || +      N0.getOperand(1).getOpcode() != ISD::Constant || +      (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) +    return SDValue(); + +  // shl/shr +  SDValue N1 = N0->getOperand(0); +  if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || +      N1.getOperand(1).getOpcode() != ISD::Constant || +      (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) +    return SDValue(); + +  // load +  if (!isa<LoadSDNode>(N1.getOperand(0))) +    return SDValue(); +  LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0)); +  EVT MemVT = Load->getMemoryVT(); +  if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || +      Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) +    return SDValue(); + + +  // If the shift op is SHL, the logic op must be AND, otherwise the result +  // will be wrong. +  if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) +    return SDValue(); + +  if (!N0.hasOneUse() || !N1.hasOneUse()) +    return SDValue(); + +  SmallVector<SDNode*, 4> SetCCs; +  if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), +                               ISD::ZERO_EXTEND, SetCCs, TLI)) +    return SDValue(); + +  // Actually do the transformation. +  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, +                                   Load->getChain(), Load->getBasePtr(), +                                   Load->getMemoryVT(), Load->getMemOperand()); + +  SDLoc DL1(N1); +  SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, +                              N1.getOperand(1)); + +  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); +  Mask = Mask.zext(VT.getSizeInBits()); +  SDLoc DL0(N0); +  SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, +                            DAG.getConstant(Mask, DL0, VT)); + +  ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); +  CombineTo(N, And); +  if (SDValue(Load, 0).hasOneUse()) { +    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); +  } else { +    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), +                                Load->getValueType(0), ExtLoad); +    CombineTo(Load, Trunc, ExtLoad.getValue(1)); +  } +  return SDValue(N,0); // Return N so it doesn't get rechecked! +} +  /// If we're narrowing or widening the result of a vector select and the final  /// size is the same size as a setcc (compare) feeding the select, then try to  /// apply the cast operation to the select's operands because matching vector @@ -7461,6 +8099,106 @@ SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {    return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);  } +// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, +                                     const TargetLowering &TLI, EVT VT, +                                     bool LegalOperations, SDNode *N, +                                     SDValue N0, ISD::LoadExtType ExtLoadType) { +  SDNode *N0Node = N0.getNode(); +  bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) +                                                   : ISD::isZEXTLoad(N0Node); +  if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || +      !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) +    return {}; + +  LoadSDNode *LN0 = cast<LoadSDNode>(N0); +  EVT MemVT = LN0->getMemoryVT(); +  if ((LegalOperations || LN0->isVolatile()) && +      !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) +    return {}; + +  SDValue ExtLoad = +      DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), +                     LN0->getBasePtr(), MemVT, LN0->getMemOperand()); +  Combiner.CombineTo(N, ExtLoad); +  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); +  return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// Only generate vector extloads when 1) they're legal, and 2) they are +// deemed desirable by the target. +static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, +                                  const TargetLowering &TLI, EVT VT, +                                  bool LegalOperations, SDNode *N, SDValue N0, +                                  ISD::LoadExtType ExtLoadType, +                                  ISD::NodeType ExtOpc) { +  if (!ISD::isNON_EXTLoad(N0.getNode()) || +      !ISD::isUNINDEXEDLoad(N0.getNode()) || +      ((LegalOperations || VT.isVector() || +        cast<LoadSDNode>(N0)->isVolatile()) && +       !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) +    return {}; + +  bool DoXform = true; +  SmallVector<SDNode *, 4> SetCCs; +  if (!N0.hasOneUse()) +    DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); +  if (VT.isVector()) +    DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); +  if (!DoXform) +    return {}; + +  LoadSDNode *LN0 = cast<LoadSDNode>(N0); +  SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), +                                   LN0->getBasePtr(), N0.getValueType(), +                                   LN0->getMemOperand()); +  Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); +  // If the load value is used only by N, replace it via CombineTo N. +  bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); +  Combiner.CombineTo(N, ExtLoad); +  if (NoReplaceTrunc) { +    DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); +  } else { +    SDValue Trunc = +        DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); +    Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); +  } +  return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, +                                       bool LegalOperations) { +  assert((N->getOpcode() == ISD::SIGN_EXTEND || +          N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); + +  SDValue SetCC = N->getOperand(0); +  if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || +      !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) +    return SDValue(); + +  SDValue X = SetCC.getOperand(0); +  SDValue Ones = SetCC.getOperand(1); +  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); +  EVT VT = N->getValueType(0); +  EVT XVT = X.getValueType(); +  // setge X, C is canonicalized to setgt, so we do not need to match that +  // pattern. The setlt sibling is folded in SimplifySelectCC() because it does +  // not require the 'not' op. +  if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { +    // Invert and smear/shift the sign bit: +    // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) +    // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) +    SDLoc DL(N); +    SDValue NotX = DAG.getNOT(DL, X, VT); +    SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); +    auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; +    return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); +  } +  return SDValue(); +} +  SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {    SDValue N0 = N->getOperand(0);    EVT VT = N->getValueType(0); @@ -7525,62 +8263,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {      }    } -  // fold (sext (load x)) -> (sext (truncate (sextload x))) -  // Only generate vector extloads when 1) they're legal, and 2) they are -  // deemed desirable by the target. -  if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && -      ((!LegalOperations && !VT.isVector() && -        !cast<LoadSDNode>(N0)->isVolatile()) || -       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { -    bool DoXform = true; -    SmallVector<SDNode*, 4> SetCCs; -    if (!N0.hasOneUse()) -      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); -    if (VT.isVector()) -      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); -    if (DoXform) { -      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), -                                       LN0->getBasePtr(), N0.getValueType(), -                                       LN0->getMemOperand()); -      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), -                                  N0.getValueType(), ExtLoad); -      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); -      // If the load value is used only by N, replace it via CombineTo N. -      bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); -      CombineTo(N, ExtLoad); -      if (NoReplaceTrunc) -        DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); -      else -        CombineTo(LN0, Trunc, ExtLoad.getValue(1)); -      return SDValue(N, 0); -    } -  } +  // Try to simplify (sext (load x)). +  if (SDValue foldedExt = +          tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, +                             ISD::SEXTLOAD, ISD::SIGN_EXTEND)) +    return foldedExt;    // fold (sext (load x)) to multiple smaller sextloads.    // Only on illegal but splittable vectors.    if (SDValue ExtLoad = CombineExtLoad(N))      return ExtLoad; -  // fold (sext (sextload x)) -> (sext (truncate (sextload x))) -  // fold (sext ( extload x)) -> (sext (truncate (sextload x))) -  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && -      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { -    LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    EVT MemVT = LN0->getMemoryVT(); -    if ((!LegalOperations && !LN0->isVolatile()) || -        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), -                                       LN0->getBasePtr(), MemVT, -                                       LN0->getMemOperand()); -      CombineTo(N, ExtLoad); -      CombineTo(N0.getNode(), -                DAG.getNode(ISD::TRUNCATE, SDLoc(N0), -                            N0.getValueType(), ExtLoad), -                ExtLoad.getValue(1)); -      return SDValue(N, 0);   // Return N so it doesn't get rechecked! -    } -  } +  // Try to simplify (sext (sextload x)). +  if (SDValue foldedExt = tryToFoldExtOfExtload( +          DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) +    return foldedExt;    // fold (sext (and/or/xor (load x), cst)) ->    //      (and/or/xor (sextload x), (sext cst)) @@ -7588,30 +8285,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {         N0.getOpcode() == ISD::XOR) &&        isa<LoadSDNode>(N0.getOperand(0)) &&        N0.getOperand(1).getOpcode() == ISD::Constant && -      TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&        (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { -    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); -    if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { -      bool DoXform = true; +    LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); +    EVT MemVT = LN00->getMemoryVT(); +    if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && +      LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {        SmallVector<SDNode*, 4> SetCCs; -      if (!N0.hasOneUse()) -        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, -                                          SetCCs, TLI); +      bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), +                                             ISD::SIGN_EXTEND, SetCCs, TLI);        if (DoXform) { -        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, -                                         LN0->getChain(), LN0->getBasePtr(), -                                         LN0->getMemoryVT(), -                                         LN0->getMemOperand()); +        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, +                                         LN00->getChain(), LN00->getBasePtr(), +                                         LN00->getMemoryVT(), +                                         LN00->getMemOperand());          APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();          Mask = Mask.sext(VT.getSizeInBits());          SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,                                    ExtLoad, DAG.getConstant(Mask, DL, VT)); -        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, -                                    SDLoc(N0.getOperand(0)), -                                    N0.getOperand(0).getValueType(), ExtLoad); -        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); +        ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);          bool NoReplaceTruncAnd = !N0.hasOneUse(); -        bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); +        bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();          CombineTo(N, And);          // If N0 has multiple uses, change other uses as well.          if (NoReplaceTruncAnd) { @@ -7619,15 +8312,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {                DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);            CombineTo(N0.getNode(), TruncAnd);          } -        if (NoReplaceTrunc) -          DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); -        else -          CombineTo(LN0, Trunc, ExtLoad.getValue(1)); +        if (NoReplaceTrunc) { +          DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); +        } else { +          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), +                                      LN00->getValueType(0), ExtLoad); +          CombineTo(LN00, Trunc, ExtLoad.getValue(1)); +        }          return SDValue(N,0); // Return N so it doesn't get rechecked!        }      }    } +  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) +    return V; +    if (N0.getOpcode() == ISD::SETCC) {      SDValue N00 = N0.getOperand(0);      SDValue N01 = N0.getOperand(1); @@ -7674,8 +8373,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {      // If the type of the setcc is larger (say, i8) then the value of the high      // bit depends on getBooleanContents(), so ask TLI for a real "true" value      // of the appropriate width. -    SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT) -                                           : TLI.getConstTrueVal(DAG, VT, DL); +    SDValue ExtTrueVal = (SetCCWidth == 1) +                             ? DAG.getAllOnesConstant(DL, VT) +                             : DAG.getBoolConstant(true, DL, VT, N00VT);      SDValue Zero = DAG.getConstant(0, DL, VT);      if (SDValue SCC =              SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) @@ -7792,13 +8492,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {      // Try to mask before the extension to avoid having to generate a larger mask,      // possibly over several sub-vectors. -    if (SrcVT.bitsLT(VT)) { +    if (SrcVT.bitsLT(VT) && VT.isVector()) {        if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&                                 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {          SDValue Op = N0.getOperand(0);          Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());          AddToWorklist(Op.getNode()); -        return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); +        SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); +        // Transfer the debug info; the new node is equivalent to N0. +        DAG.transferDbgValues(N0, ZExtOrTrunc); +        return ZExtOrTrunc;        }      } @@ -7830,39 +8533,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {                         X, DAG.getConstant(Mask, DL, VT));    } -  // fold (zext (load x)) -> (zext (truncate (zextload x))) -  // Only generate vector extloads when 1) they're legal, and 2) they are -  // deemed desirable by the target. -  if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && -      ((!LegalOperations && !VT.isVector() && -        !cast<LoadSDNode>(N0)->isVolatile()) || -       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { -    bool DoXform = true; -    SmallVector<SDNode*, 4> SetCCs; -    if (!N0.hasOneUse()) -      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); -    if (VT.isVector()) -      DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); -    if (DoXform) { -      LoadSDNode *LN0 = cast<LoadSDNode>(N0); -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, -                                       LN0->getChain(), -                                       LN0->getBasePtr(), N0.getValueType(), -                                       LN0->getMemOperand()); - -      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), -                                  N0.getValueType(), ExtLoad); -      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); -      // If the load value is used only by N, replace it via CombineTo N. -      bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); -      CombineTo(N, ExtLoad); -      if (NoReplaceTrunc) -        DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); -      else -        CombineTo(LN0, Trunc, ExtLoad.getValue(1)); -      return SDValue(N, 0); // Return N so it doesn't get rechecked! -    } -  } +  // Try to simplify (zext (load x)). +  if (SDValue foldedExt = +          tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, +                             ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) +    return foldedExt;    // fold (zext (load x)) to multiple smaller zextloads.    // Only on illegal but splittable vectors. @@ -7877,10 +8552,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {         N0.getOpcode() == ISD::XOR) &&        isa<LoadSDNode>(N0.getOperand(0)) &&        N0.getOperand(1).getOpcode() == ISD::Constant && -      TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&        (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { -    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); -    if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { +    LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); +    EVT MemVT = LN00->getMemoryVT(); +    if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && +        LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {        bool DoXform = true;        SmallVector<SDNode*, 4> SetCCs;        if (!N0.hasOneUse()) { @@ -7888,29 +8564,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {            auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));            EVT LoadResultTy = AndC->getValueType(0);            EVT ExtVT; -          if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT)) +          if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))              DoXform = false;          } -        if (DoXform) -          DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), -                                            ISD::ZERO_EXTEND, SetCCs, TLI);        } +      if (DoXform) +        DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), +                                          ISD::ZERO_EXTEND, SetCCs, TLI);        if (DoXform) { -        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, -                                         LN0->getChain(), LN0->getBasePtr(), -                                         LN0->getMemoryVT(), -                                         LN0->getMemOperand()); +        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, +                                         LN00->getChain(), LN00->getBasePtr(), +                                         LN00->getMemoryVT(), +                                         LN00->getMemOperand());          APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();          Mask = Mask.zext(VT.getSizeInBits());          SDLoc DL(N);          SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,                                    ExtLoad, DAG.getConstant(Mask, DL, VT)); -        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, -                                    SDLoc(N0.getOperand(0)), -                                    N0.getOperand(0).getValueType(), ExtLoad); -        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); +        ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);          bool NoReplaceTruncAnd = !N0.hasOneUse(); -        bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); +        bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();          CombineTo(N, And);          // If N0 has multiple uses, change other uses as well.          if (NoReplaceTruncAnd) { @@ -7918,35 +8591,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {                DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);            CombineTo(N0.getNode(), TruncAnd);          } -        if (NoReplaceTrunc) -          DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); -        else -          CombineTo(LN0, Trunc, ExtLoad.getValue(1)); +        if (NoReplaceTrunc) { +          DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); +        } else { +          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), +                                      LN00->getValueType(0), ExtLoad); +          CombineTo(LN00, Trunc, ExtLoad.getValue(1)); +        }          return SDValue(N,0); // Return N so it doesn't get rechecked!        }      }    } -  // fold (zext (zextload x)) -> (zext (truncate (zextload x))) -  // fold (zext ( extload x)) -> (zext (truncate (zextload x))) -  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && -      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { -    LoadSDNode *LN0 = cast<LoadSDNode>(N0); -    EVT MemVT = LN0->getMemoryVT(); -    if ((!LegalOperations && !LN0->isVolatile()) || -        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { -      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, -                                       LN0->getChain(), -                                       LN0->getBasePtr(), MemVT, -                                       LN0->getMemOperand()); -      CombineTo(N, ExtLoad); -      CombineTo(N0.getNode(), -                DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), -                            ExtLoad), -                ExtLoad.getValue(1)); -      return SDValue(N, 0);   // Return N so it doesn't get rechecked! -    } -  } +  // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> +  //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) +  if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) +    return ZExtLoad; + +  // Try to simplify (zext (zextload x)). +  if (SDValue foldedExt = tryToFoldExtOfExtload( +          DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) +    return foldedExt; + +  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) +    return V;    if (N0.getOpcode() == ISD::SETCC) {      // Only do this before legalize for now. @@ -8084,24 +8752,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {      bool DoXform = true;      SmallVector<SDNode*, 4> SetCCs;      if (!N0.hasOneUse()) -      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); +      DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, +                                        TLI);      if (DoXform) {        LoadSDNode *LN0 = cast<LoadSDNode>(N0);        SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,                                         LN0->getChain(),                                         LN0->getBasePtr(), N0.getValueType(),                                         LN0->getMemOperand()); -      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), -                                  N0.getValueType(), ExtLoad); -      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), -                      ISD::ANY_EXTEND); +      ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);        // If the load value is used only by N, replace it via CombineTo N.        bool NoReplaceTrunc = N0.hasOneUse();        CombineTo(N, ExtLoad); -      if (NoReplaceTrunc) +      if (NoReplaceTrunc) {          DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); -      else +      } else { +        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), +                                    N0.getValueType(), ExtLoad);          CombineTo(LN0, Trunc, ExtLoad.getValue(1)); +      }        return SDValue(N, 0); // Return N so it doesn't get rechecked!      }    } @@ -8109,9 +8778,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {    // fold (aext (zextload x)) -> (aext (truncate (zextload x)))    // fold (aext (sextload x)) -> (aext (truncate (sextload x)))    // fold (aext ( extload x)) -> (aext (truncate (extload  x))) -  if (N0.getOpcode() == ISD::LOAD && -      !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && -      N0.hasOneUse()) { +  if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && +      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {      LoadSDNode *LN0 = cast<LoadSDNode>(N0);      ISD::LoadExtType ExtType = LN0->getExtensionType();      EVT MemVT = LN0->getMemoryVT(); @@ -8120,10 +8788,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {                                         VT, LN0->getChain(), LN0->getBasePtr(),                                         MemVT, LN0->getMemOperand());        CombineTo(N, ExtLoad); -      CombineTo(N0.getNode(), -                DAG.getNode(ISD::TRUNCATE, SDLoc(N0), -                            N0.getValueType(), ExtLoad), -                ExtLoad.getValue(1)); +      DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));        return SDValue(N, 0);   // Return N so it doesn't get rechecked!      }    } @@ -8263,8 +8928,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {    unsigned ShAmt = 0;    if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { -    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { -      ShAmt = N01->getZExtValue(); +    SDValue SRL = N0; +    if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { +      ShAmt = ConstShift->getZExtValue();        unsigned EVTBits = ExtVT.getSizeInBits();        // Is the shift amount a multiple of size of VT?        if ((ShAmt & (EVTBits-1)) == 0) { @@ -8277,17 +8943,36 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {        // At this point, we must have a load or else we can't do the transform.        if (!isa<LoadSDNode>(N0)) return SDValue(); +      auto *LN0 = cast<LoadSDNode>(N0); +        // Because a SRL must be assumed to *need* to zero-extend the high bits        // (as opposed to anyext the high bits), we can't combine the zextload        // lowering of SRL and an sextload. -      if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) +      if (LN0->getExtensionType() == ISD::SEXTLOAD)          return SDValue();        // If the shift amount is larger than the input type then we're not        // accessing any of the loaded bytes.  If the load was a zextload/extload        // then the result of the shift+trunc is zero/undef (handled elsewhere). -      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) +      if (ShAmt >= LN0->getMemoryVT().getSizeInBits())          return SDValue(); + +      // If the SRL is only used by a masking AND, we may be able to adjust +      // the ExtVT to make the AND redundant. +      SDNode *Mask = *(SRL->use_begin()); +      if (Mask->getOpcode() == ISD::AND && +          isa<ConstantSDNode>(Mask->getOperand(1))) { +        const APInt &ShiftMask = +          cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); +        if (ShiftMask.isMask()) { +          EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), +                                           ShiftMask.countTrailingOnes()); +          // If the mask is smaller, recompute the type. +          if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && +              TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) +            ExtVT = MaskedVT; +        } +      }      }    } @@ -8307,7 +8992,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {      return SDValue();    LoadSDNode *LN0 = cast<LoadSDNode>(N0); -  if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt)) +  if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))      return SDValue();    // For big endian targets, we need to adjust the offset to the pointer to @@ -8403,7 +9088,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {        return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);    } -  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x) +  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)    if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||         N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||         N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && @@ -8777,6 +9462,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {      return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));    } +  // fold (truncate (extract_subvector(ext x))) -> +  //      (extract_subvector x) +  // TODO: This can be generalized to cover cases where the truncate and extract +  // do not fully cancel each other out. +  if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { +    SDValue N00 = N0.getOperand(0); +    if (N00.getOpcode() == ISD::SIGN_EXTEND || +        N00.getOpcode() == ISD::ZERO_EXTEND || +        N00.getOpcode() == ISD::ANY_EXTEND) { +      if (N00.getOperand(0)->getValueType(0).getVectorElementType() == +          VT.getVectorElementType()) +        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, +                           N00.getOperand(0), N0.getOperand(1)); +    } +  } +    if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))      return NewVSel; @@ -8897,17 +9598,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {    }    // If the input is a constant, let getNode fold it. -  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { -    // If we can't allow illegal operations, we need to check that this is just -    // a fp -> int or int -> conversion and that the resulting operation will -    // be legal. -    if (!LegalOperations || -        (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && -         TLI.isOperationLegal(ISD::ConstantFP, VT)) || -        (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && -         TLI.isOperationLegal(ISD::Constant, VT))) -      return DAG.getBitcast(VT, N0); -  } +  // We always need to check that this is just a fp -> int or int -> conversion +  // otherwise we will get back N which will confuse the caller into thinking +  // we used CombineTo. This can block target combines from running. If we can't +  // allowed legal operations, we need to ensure the resulting operation will be +  // legal. +  // TODO: Maybe we should check that the return value isn't N explicitly? +  if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && +       (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) || +      (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && +       (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT)))) +    return DAG.getBitcast(VT, N0);    // (conv (conv x, t1), t2) -> (conv x, t2)    if (N0.getOpcode() == ISD::BITCAST) @@ -9253,7 +9954,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {  static bool isContractable(SDNode *N) {    SDNodeFlags F = N->getFlags(); -  return F.hasAllowContract() || F.hasUnsafeAlgebra(); +  return F.hasAllowContract() || F.hasAllowReassociation();  }  /// Try to perform FMA combining on a given FADD node. @@ -9277,8 +9978,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {    if (!HasFMAD && !HasFMA)      return SDValue(); +  SDNodeFlags Flags = N->getFlags(); +  bool CanFuse = Options.UnsafeFPMath || isContractable(N);    bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || -                              Options.UnsafeFPMath || HasFMAD); +                              CanFuse || HasFMAD);    // If the addition is not contractable, do not combine.    if (!AllowFusionGlobally && !isContractable(N))      return SDValue(); @@ -9308,14 +10011,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {    // fold (fadd (fmul x, y), z) -> (fma x, y, z)    if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {      return DAG.getNode(PreferredFusedOpcode, SL, VT, -                       N0.getOperand(0), N0.getOperand(1), N1); +                       N0.getOperand(0), N0.getOperand(1), N1, Flags);    }    // fold (fadd x, (fmul y, z)) -> (fma y, z, x)    // Note: Commutes FADD operands.    if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {      return DAG.getNode(PreferredFusedOpcode, SL, VT, -                       N1.getOperand(0), N1.getOperand(1), N0); +                       N1.getOperand(0), N1.getOperand(1), N0, Flags);    }    // Look through FP_EXTEND nodes to do more combining. @@ -9329,7 +10032,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                       N00.getOperand(0)),                           DAG.getNode(ISD::FP_EXTEND, SL, VT, -                                     N00.getOperand(1)), N1); +                                     N00.getOperand(1)), N1, Flags);      }    } @@ -9343,16 +10046,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                       N10.getOperand(0)),                           DAG.getNode(ISD::FP_EXTEND, SL, VT, -                                     N10.getOperand(1)), N0); +                                     N10.getOperand(1)), N0, Flags);      }    }    // More folding opportunities when target permits.    if (Aggressive) {      // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) -    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF -    // are currently only supported on binary nodes. -    if (Options.UnsafeFPMath && +    if (CanFuse &&          N0.getOpcode() == PreferredFusedOpcode &&          N0.getOperand(2).getOpcode() == ISD::FMUL &&          N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { @@ -9361,13 +10062,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {                           DAG.getNode(PreferredFusedOpcode, SL, VT,                                       N0.getOperand(2).getOperand(0),                                       N0.getOperand(2).getOperand(1), -                                     N1)); +                                     N1, Flags), Flags);      }      // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) -    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF -    // are currently only supported on binary nodes. -    if (Options.UnsafeFPMath && +    if (CanFuse &&          N1->getOpcode() == PreferredFusedOpcode &&          N1.getOperand(2).getOpcode() == ISD::FMUL &&          N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { @@ -9376,19 +10075,20 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {                           DAG.getNode(PreferredFusedOpcode, SL, VT,                                       N1.getOperand(2).getOperand(0),                                       N1.getOperand(2).getOperand(1), -                                     N0)); +                                     N0, Flags), Flags);      }      // fold (fadd (fma x, y, (fpext (fmul u, v))), z)      //   -> (fma x, y, (fma (fpext u), (fpext v), z))      auto FoldFAddFMAFPExtFMul = [&] ( -      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { +      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, +      SDNodeFlags Flags) {        return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,                           DAG.getNode(PreferredFusedOpcode, SL, VT,                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), -                                     Z)); +                                     Z, Flags), Flags);      };      if (N0.getOpcode() == PreferredFusedOpcode) {        SDValue N02 = N0.getOperand(2); @@ -9398,7 +10098,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {              TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {            return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),                                        N020.getOperand(0), N020.getOperand(1), -                                      N1); +                                      N1, Flags);          }        }      } @@ -9409,14 +10109,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {      // operation into two double-precision operations, which might not be      // interesting for all targets, especially GPUs.      auto FoldFAddFPExtFMAFMul = [&] ( -      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { +      SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, +      SDNodeFlags Flags) {        return DAG.getNode(PreferredFusedOpcode, SL, VT,                           DAG.getNode(ISD::FP_EXTEND, SL, VT, X),                           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),                           DAG.getNode(PreferredFusedOpcode, SL, VT,                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), -                                     Z)); +                                     Z, Flags), Flags);      };      if (N0.getOpcode() == ISD::FP_EXTEND) {        SDValue N00 = N0.getOperand(0); @@ -9426,7 +10127,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {              TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {            return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),                                        N002.getOperand(0), N002.getOperand(1), -                                      N1); +                                      N1, Flags);          }        }      } @@ -9441,7 +10142,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {              TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {            return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),                                        N120.getOperand(0), N120.getOperand(1), -                                      N0); +                                      N0, Flags);          }        }      } @@ -9459,7 +10160,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {              TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {            return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),                                        N102.getOperand(0), N102.getOperand(1), -                                      N0); +                                      N0, Flags);          }        }      } @@ -9488,8 +10189,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {    if (!HasFMAD && !HasFMA)      return SDValue(); +  const SDNodeFlags Flags = N->getFlags(); +  bool CanFuse = Options.UnsafeFPMath || isContractable(N);    bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || -                              Options.UnsafeFPMath || HasFMAD); +                              CanFuse || HasFMAD); +    // If the subtraction is not contractable, do not combine.    if (!AllowFusionGlobally && !isContractable(N))      return SDValue(); @@ -9514,16 +10218,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {    if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {      return DAG.getNode(PreferredFusedOpcode, SL, VT,                         N0.getOperand(0), N0.getOperand(1), -                       DAG.getNode(ISD::FNEG, SL, VT, N1)); +                       DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);    }    // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)    // Note: Commutes FSUB operands. -  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) +  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {      return DAG.getNode(PreferredFusedOpcode, SL, VT,                         DAG.getNode(ISD::FNEG, SL, VT,                                     N1.getOperand(0)), -                       N1.getOperand(1), N0); +                       N1.getOperand(1), N0, Flags); +  }    // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))    if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && @@ -9532,7 +10237,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {      SDValue N01 = N0.getOperand(0).getOperand(1);      return DAG.getNode(PreferredFusedOpcode, SL, VT,                         DAG.getNode(ISD::FNEG, SL, VT, N00), N01, -                       DAG.getNode(ISD::FNEG, SL, VT, N1)); +                       DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);    }    // Look through FP_EXTEND nodes to do more combining. @@ -9548,7 +10253,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                       N00.getOperand(0)),                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                       N00.getOperand(1)), -                         DAG.getNode(ISD::FNEG, SL, VT, N1)); +                         DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);      }    } @@ -9565,7 +10270,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                   N10.getOperand(0))),                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                       N10.getOperand(1)), -                         N0); +                         N0, Flags);                }    } @@ -9587,7 +10292,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                     N000.getOperand(0)),                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                     N000.getOperand(1)), -                                       N1)); +                                       N1, Flags));        }      }    } @@ -9610,7 +10315,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                     N000.getOperand(0)),                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                     N000.getOperand(1)), -                                       N1)); +                                       N1, Flags));        }      }    } @@ -9619,9 +10324,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {    if (Aggressive) {      // fold (fsub (fma x, y, (fmul u, v)), z)      //   -> (fma x, y (fma u, v, (fneg z))) -    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF -    // are currently only supported on binary nodes. -    if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode && +    if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&          isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&          N0.getOperand(2)->hasOneUse()) {        return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -9630,14 +10333,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                       N0.getOperand(2).getOperand(0),                                       N0.getOperand(2).getOperand(1),                                       DAG.getNode(ISD::FNEG, SL, VT, -                                                 N1))); +                                                 N1), Flags), Flags);                }      // fold (fsub x, (fma y, z, (fmul u, v)))      //   -> (fma (fneg y), z, (fma (fneg u), v, x)) -    // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF -    // are currently only supported on binary nodes. -    if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode && +    if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&          isContractableFMUL(N1.getOperand(2))) {        SDValue N20 = N1.getOperand(2).getOperand(0);        SDValue N21 = N1.getOperand(2).getOperand(1); @@ -9647,8 +10348,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                           N1.getOperand(1),                           DAG.getNode(PreferredFusedOpcode, SL, VT,                                       DAG.getNode(ISD::FNEG, SL, VT, N20), - -                                     N21, N0)); +                                     N21, N0, Flags), Flags);            } @@ -9668,7 +10368,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                       N020.getOperand(1)),                                           DAG.getNode(ISD::FNEG, SL, VT, -                                                     N1))); +                                                     N1), Flags), Flags);                        }        }      } @@ -9696,7 +10396,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                       N002.getOperand(1)),                                           DAG.getNode(ISD::FNEG, SL, VT, -                                                     N1))); +                                                     N1), Flags), Flags);                        }        }      } @@ -9719,7 +10419,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                                 VT, N1200)),                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                     N1201), -                                       N0)); +                                       N0, Flags), Flags);                }      } @@ -9750,7 +10450,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {                                                                 VT, N1020)),                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,                                                     N1021), -                                       N0)); +                                       N0, Flags), Flags);                }      }    } @@ -9766,6 +10466,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {    SDValue N1 = N->getOperand(1);    EVT VT = N->getValueType(0);    SDLoc SL(N); +  const SDNodeFlags Flags = N->getFlags();    assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); @@ -9797,52 +10498,54 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {    // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)    // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) -  auto FuseFADD = [&](SDValue X, SDValue Y) { +  auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {      if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {        auto XC1 = isConstOrConstSplatFP(X.getOperand(1));        if (XC1 && XC1->isExactlyValue(+1.0)) -        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); +        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, +                           Y, Flags);        if (XC1 && XC1->isExactlyValue(-1.0))          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, -                           DAG.getNode(ISD::FNEG, SL, VT, Y)); +                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);            }      return SDValue();    }; -  if (SDValue FMA = FuseFADD(N0, N1)) +  if (SDValue FMA = FuseFADD(N0, N1, Flags))      return FMA; -  if (SDValue FMA = FuseFADD(N1, N0)) +  if (SDValue FMA = FuseFADD(N1, N0, Flags))      return FMA;    // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)    // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))    // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))    // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) -  auto FuseFSUB = [&](SDValue X, SDValue Y) { +  auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {      if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {        auto XC0 = isConstOrConstSplatFP(X.getOperand(0));        if (XC0 && XC0->isExactlyValue(+1.0))          return DAG.getNode(PreferredFusedOpcode, SL, VT,                             DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, -                           Y); +                           Y, Flags);        if (XC0 && XC0->isExactlyValue(-1.0))          return DAG.getNode(PreferredFusedOpcode, SL, VT,                             DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, -                           DAG.getNode(ISD::FNEG, SL, VT, Y)); +                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);              auto XC1 = isConstOrConstSplatFP(X.getOperand(1));        if (XC1 && XC1->isExactlyValue(+1.0))          return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, -                           DAG.getNode(ISD::FNEG, SL, VT, Y)); +                           DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);        if (XC1 && XC1->isExactlyValue(-1.0)) -        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); +        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, +                           Y, Flags);      }      return SDValue();    }; -  if (SDValue FMA = FuseFSUB(N0, N1)) +  if (SDValue FMA = FuseFSUB(N0, N1, Flags))      return FMA; -  if (SDValue FMA = FuseFSUB(N1, N0)) +  if (SDValue FMA = FuseFSUB(N1, N0, Flags))      return FMA;    return SDValue(); @@ -9904,35 +10607,42 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {      return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);    } -  // FIXME: Auto-upgrade the target/function-level option. -  if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { -    // fold (fadd A, 0) -> A -    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) -      if (N1C->isZero()) -        return N0; +  ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); +  if (N1C && N1C->isZero()) { +    if (N1C->isNegative() || Options.UnsafeFPMath || +        Flags.hasNoSignedZeros()) { +      // fold (fadd A, 0) -> A +      return N0; +    }    } -  // If 'unsafe math' is enabled, fold lots of things. -  if (Options.UnsafeFPMath) { -    // No FP constant should be created after legalization as Instruction -    // Selection pass has a hard time dealing with FP constants. -    bool AllowNewConst = (Level < AfterLegalizeDAG); - -    // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) -    if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && -        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) -      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), -                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, -                                     Flags), -                         Flags); +  // No FP constant should be created after legalization as Instruction +  // Selection pass has a hard time dealing with FP constants. +  bool AllowNewConst = (Level < AfterLegalizeDAG); +  // If 'unsafe math' or nnan is enabled, fold lots of things. +  if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {      // If allowed, fold (fadd (fneg x), x) -> 0.0 -    if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) +    if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)        return DAG.getConstantFP(0.0, DL, VT);      // If allowed, fold (fadd x, (fneg x)) -> 0.0 -    if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) +    if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)        return DAG.getConstantFP(0.0, DL, VT); +  } + +  // If 'unsafe math' or reassoc and nsz, fold lots of things. +  // TODO: break out portions of the transformations below for which Unsafe is +  //       considered and which do not require both nsz and reassoc +  if ((Options.UnsafeFPMath || +       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && +      AllowNewConst) { +    // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 +    if (N1CFP && N0.getOpcode() == ISD::FADD && +        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { +      SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); +      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); +    }      // We can fold chains of FADD's of the same value into multiplications.      // This transform is not safe in general because we are reducing the number @@ -9980,7 +10690,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {          }        } -      if (N0.getOpcode() == ISD::FADD && AllowNewConst) { +      if (N0.getOpcode() == ISD::FADD) {          bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));          // (fadd (fadd x, x), x) -> (fmul x, 3.0)          if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && @@ -9990,7 +10700,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {          }        } -      if (N1.getOpcode() == ISD::FADD && AllowNewConst) { +      if (N1.getOpcode() == ISD::FADD) {          bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));          // (fadd x, (fadd x, x)) -> (fmul x, 3.0)          if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && @@ -10001,8 +10711,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {        }        // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) -      if (AllowNewConst && -          N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && +      if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&            N0.getOperand(0) == N0.getOperand(1) &&            N1.getOperand(0) == N1.getOperand(1) &&            N0.getOperand(0) == N1.getOperand(0)) { @@ -10042,15 +10751,23 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {    if (SDValue NewSel = foldBinOpIntoSelect(N))      return NewSel; -  // fold (fsub A, (fneg B)) -> (fadd A, B) -  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) -    return DAG.getNode(ISD::FADD, DL, VT, N0, -                       GetNegatedExpression(N1, DAG, LegalOperations), Flags); +  // (fsub A, 0) -> A +  if (N1CFP && N1CFP->isZero()) { +    if (!N1CFP->isNegative() || Options.UnsafeFPMath || +        Flags.hasNoSignedZeros()) { +      return N0; +    } +  } -  // FIXME: Auto-upgrade the target/function-level option. -  if (Options.NoSignedZerosFPMath  || N->getFlags().hasNoSignedZeros()) { -    // (fsub 0, B) -> -B -    if (N0CFP && N0CFP->isZero()) { +  if (N0 == N1) { +    // (fsub x, x) -> 0.0 +    if (Options.UnsafeFPMath || Flags.hasNoNaNs()) +      return DAG.getConstantFP(0.0f, DL, VT); +  } + +  // (fsub 0, B) -> -B +  if (N0CFP && N0CFP->isZero()) { +    if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {        if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))          return GetNegatedExpression(N1, DAG, LegalOperations);        if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -10058,16 +10775,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {      }    } +  // fold (fsub A, (fneg B)) -> (fadd A, B) +  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) +    return DAG.getNode(ISD::FADD, DL, VT, N0, +                       GetNegatedExpression(N1, DAG, LegalOperations), Flags); +    // If 'unsafe math' is enabled, fold lots of things.    if (Options.UnsafeFPMath) { -    // (fsub A, 0) -> A -    if (N1CFP && N1CFP->isZero()) -      return N0; - -    // (fsub x, x) -> 0.0 -    if (N0 == N1) -      return DAG.getConstantFP(0.0f, DL, VT); -      // (fsub x, (fadd x, y)) -> (fneg y)      // (fsub x, (fadd y, x)) -> (fneg y)      if (N1.getOpcode() == ISD::FADD) { @@ -10124,12 +10838,15 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {    if (SDValue NewSel = foldBinOpIntoSelect(N))      return NewSel; -  if (Options.UnsafeFPMath) { +  if (Options.UnsafeFPMath ||  +      (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {      // fold (fmul A, 0) -> 0      if (N1CFP && N1CFP->isZero())        return N1; +  }  -    // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) +  if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { +    // fmul (fmul X, C1), C2 -> fmul X, C1 * C2      if (N0.getOpcode() == ISD::FMUL) {        // Fold scalars or any vector constants (not just splats).        // This fold is done in general by InstCombine, but extra fmul insts @@ -10153,13 +10870,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {        }      } -    // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) -    // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs -    // during an early run of DAGCombiner can prevent folding with fmuls -    // inserted during lowering. -    if (N0.getOpcode() == ISD::FADD && -        (N0.getOperand(0) == N0.getOperand(1)) && -        N0.hasOneUse()) { +    // Match a special-case: we convert X * 2.0 into fadd. +    // fmul (fadd X, X), C -> fmul X, 2.0 * C +    if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && +        N0.getOperand(0) == N0.getOperand(1)) {        const SDValue Two = DAG.getConstantFP(2.0, DL, VT);        SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);        return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); @@ -10253,6 +10967,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {    SDLoc DL(N);    const TargetOptions &Options = DAG.getTarget().Options; +  // FMA nodes have flags that propagate to the created nodes. +  const SDNodeFlags Flags = N->getFlags(); +  bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); +    // Constant fold FMA.    if (isa<ConstantFPSDNode>(N0) &&        isa<ConstantFPSDNode>(N1) && @@ -10260,7 +10978,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {      return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);    } -  if (Options.UnsafeFPMath) { +  if (UnsafeFPMath) {      if (N0CFP && N0CFP->isZero())        return N2;      if (N1CFP && N1CFP->isZero()) @@ -10277,12 +10995,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {       !isConstantFPBuildVectorOrConstantFP(N1))      return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); -  // TODO: FMA nodes should have flags that propagate to the created nodes. -  // For now, create a Flags object for use with all unsafe math transforms. -  SDNodeFlags Flags; -  Flags.setUnsafeAlgebra(true); - -  if (Options.UnsafeFPMath) { +  if (UnsafeFPMath) {      // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)      if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&          isConstantFPBuildVectorOrConstantFP(N1) && @@ -10328,7 +11041,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {      }    } -  if (Options.UnsafeFPMath) { +  if (UnsafeFPMath) {      // (fma x, c, x) -> (fmul x, (c+1))      if (N1CFP && N0 == N2) {        return DAG.getNode(ISD::FMUL, DL, VT, N0, @@ -10435,7 +11148,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {    if (SDValue NewSel = foldBinOpIntoSelect(N))      return NewSel; -  if (Options.UnsafeFPMath) { +  if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {      // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.      if (N1CFP) {        // Compute the reciprocal 1.0 / c2. @@ -10544,17 +11257,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {  }  SDValue DAGCombiner::visitFSQRT(SDNode *N) { -  if (!DAG.getTarget().Options.UnsafeFPMath) +  SDNodeFlags Flags = N->getFlags(); +  if (!DAG.getTarget().Options.UnsafeFPMath &&  +      !Flags.hasApproximateFuncs())      return SDValue();    SDValue N0 = N->getOperand(0);    if (TLI.isFsqrtCheap(N0, DAG))      return SDValue(); -  // TODO: FSQRT nodes should have flags that propagate to the created nodes. -  // For now, create a Flags object for use with all unsafe math transforms. -  SDNodeFlags Flags; -  Flags.setUnsafeAlgebra(true); +  // FSQRT nodes have flags that propagate to the created nodes.    return buildSqrtEstimate(N0, Flags);  } @@ -10622,6 +11334,41 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {    return SDValue();  } +static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, +                               const TargetLowering &TLI) { +  // This optimization is guarded by a function attribute because it may produce +  // unexpected results. Ie, programs may be relying on the platform-specific +  // undefined behavior when the float-to-int conversion overflows. +  const Function &F = DAG.getMachineFunction().getFunction(); +  Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); +  if (StrictOverflow.getValueAsString().equals("false")) +    return SDValue(); + +  // We only do this if the target has legal ftrunc. Otherwise, we'd likely be +  // replacing casts with a libcall. We also must be allowed to ignore -0.0 +  // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer +  // conversions would return +0.0. +  // FIXME: We should be able to use node-level FMF here. +  // TODO: If strict math, should we use FABS (+ range check for signed cast)? +  EVT VT = N->getValueType(0); +  if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || +      !DAG.getTarget().Options.NoSignedZerosFPMath) +    return SDValue(); + +  // fptosi/fptoui round towards zero, so converting from FP to integer and +  // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X +  SDValue N0 = N->getOperand(0); +  if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && +      N0.getOperand(0).getValueType() == VT) +    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + +  if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && +      N0.getOperand(0).getValueType() == VT) +    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + +  return SDValue(); +} +  SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {    SDValue N0 = N->getOperand(0);    EVT VT = N->getValueType(0); @@ -10673,6 +11420,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {      }    } +  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) +    return FTrunc; +    return SDValue();  } @@ -10712,6 +11462,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {      }    } +  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) +    return FTrunc; +    return SDValue();  } @@ -11118,16 +11871,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {                         N1.getOperand(0), N1.getOperand(1), N2);    } -  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || -      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && -       (N1.getOperand(0).hasOneUse() && -        N1.getOperand(0).getOpcode() == ISD::SRL))) { -    SDNode *Trunc = nullptr; -    if (N1.getOpcode() == ISD::TRUNCATE) { -      // Look pass the truncate. -      Trunc = N1.getNode(); -      N1 = N1.getOperand(0); -    } +  if (N1.hasOneUse()) { +    if (SDValue NewN1 = rebuildSetCC(N1)) +      return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); +  } + +  return SDValue(); +} + +SDValue DAGCombiner::rebuildSetCC(SDValue N) { +  if (N.getOpcode() == ISD::SRL || +      (N.getOpcode() == ISD::TRUNCATE && +       (N.getOperand(0).hasOneUse() && +        N.getOperand(0).getOpcode() == ISD::SRL))) { +    // Look pass the truncate. +    if (N.getOpcode() == ISD::TRUNCATE) +      N = N.getOperand(0);      // Match this pattern so that we can generate simpler code:      // @@ -11146,74 +11905,55 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {      // This applies only when the AND constant value has one bit set and the      // SRL constant is equal to the log2 of the AND constant. The back-end is      // smart enough to convert the result into a TEST/JMP sequence. -    SDValue Op0 = N1.getOperand(0); -    SDValue Op1 = N1.getOperand(1); +    SDValue Op0 = N.getOperand(0); +    SDValue Op1 = N.getOperand(1); -    if (Op0.getOpcode() == ISD::AND && -        Op1.getOpcode() == ISD::Constant) { +    if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {        SDValue AndOp1 = Op0.getOperand(1);        if (AndOp1.getOpcode() == ISD::Constant) {          const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();          if (AndConst.isPowerOf2() && -            cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { +            cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {            SDLoc DL(N); -          SDValue SetCC = -            DAG.getSetCC(DL, -                         getSetCCResultType(Op0.getValueType()), -                         Op0, DAG.getConstant(0, DL, Op0.getValueType()), -                         ISD::SETNE); - -          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, -                                          MVT::Other, Chain, SetCC, N2); -          // Don't add the new BRCond into the worklist or else SimplifySelectCC -          // will convert it back to (X & C1) >> C2. -          CombineTo(N, NewBRCond, false); -          // Truncate is dead. -          if (Trunc) -            deleteAndRecombine(Trunc); -          // Replace the uses of SRL with SETCC -          WorklistRemover DeadNodes(*this); -          DAG.ReplaceAllUsesOfValueWith(N1, SetCC); -          deleteAndRecombine(N1.getNode()); -          return SDValue(N, 0);   // Return N so it doesn't get rechecked! +          return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), +                              Op0, DAG.getConstant(0, DL, Op0.getValueType()), +                              ISD::SETNE);          }        }      } - -    if (Trunc) -      // Restore N1 if the above transformation doesn't match. -      N1 = N->getOperand(1);    }    // Transform br(xor(x, y)) -> br(x != y)    // Transform br(xor(xor(x,y), 1)) -> br (x == y) -  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { -    SDNode *TheXor = N1.getNode(); +  if (N.getOpcode() == ISD::XOR) { +    // Because we may call this on a speculatively constructed +    // SimplifiedSetCC Node, we need to simplify this node first. +    // Ideally this should be folded into SimplifySetCC and not +    // here. For now, grab a handle to N so we don't lose it from +    // replacements interal to the visit. +    HandleSDNode XORHandle(N); +    while (N.getOpcode() == ISD::XOR) { +      SDValue Tmp = visitXOR(N.getNode()); +      // No simplification done. +      if (!Tmp.getNode()) +        break; +      // Returning N is form in-visit replacement that may invalidated +      // N. Grab value from Handle. +      if (Tmp.getNode() == N.getNode()) +        N = XORHandle.getValue(); +      else // Node simplified. Try simplifying again. +        N = Tmp; +    } + +    if (N.getOpcode() != ISD::XOR) +      return N; + +    SDNode *TheXor = N.getNode(); +      SDValue Op0 = TheXor->getOperand(0);      SDValue Op1 = TheXor->getOperand(1); -    if (Op0.getOpcode() == Op1.getOpcode()) { -      // Avoid missing important xor optimizations. -      if (SDValue Tmp = visitXOR(TheXor)) { -        if (Tmp.getNode() != TheXor) { -          DEBUG(dbgs() << "\nReplacing.8 "; -                TheXor->dump(&DAG); -                dbgs() << "\nWith: "; -                Tmp.getNode()->dump(&DAG); -                dbgs() << '\n'); -          WorklistRemover DeadNodes(*this); -          DAG.ReplaceAllUsesOfValueWith(N1, Tmp); -          deleteAndRecombine(TheXor); -          return DAG.getNode(ISD::BRCOND, SDLoc(N), -                             MVT::Other, Chain, Tmp, N2); -        } - -        // visitXOR has changed XOR's operands or replaced the XOR completely, -        // bail out. -        return SDValue(N, 0); -      } -    }      if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {        bool Equal = false; @@ -11223,19 +11963,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {          Equal = true;        } -      EVT SetCCVT = N1.getValueType(); +      EVT SetCCVT = N.getValueType();        if (LegalTypes)          SetCCVT = getSetCCResultType(SetCCVT); -      SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), -                                   SetCCVT, -                                   Op0, Op1, -                                   Equal ? ISD::SETEQ : ISD::SETNE);        // Replace the uses of XOR with SETCC -      WorklistRemover DeadNodes(*this); -      DAG.ReplaceAllUsesOfValueWith(N1, SetCC); -      deleteAndRecombine(N1.getNode()); -      return DAG.getNode(ISD::BRCOND, SDLoc(N), -                         MVT::Other, Chain, SetCC, N2); +      return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, +                          Equal ? ISD::SETEQ : ISD::SETNE);      }    } @@ -11467,11 +12200,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {                                   BasePtr, Offset, AM);    ++PreIndexedNodes;    ++NodesCombined; -  DEBUG(dbgs() << "\nReplacing.4 "; -        N->dump(&DAG); -        dbgs() << "\nWith: "; -        Result.getNode()->dump(&DAG); -        dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; +             Result.getNode()->dump(&DAG); dbgs() << '\n');    WorklistRemover DeadNodes(*this);    if (isLoad) {      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -11636,11 +12366,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {                                  BasePtr, Offset, AM);          ++PostIndexedNodes;          ++NodesCombined; -        DEBUG(dbgs() << "\nReplacing.5 "; -              N->dump(&DAG); -              dbgs() << "\nWith: "; -              Result.getNode()->dump(&DAG); -              dbgs() << '\n'); +        LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); +                   dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); +                   dbgs() << '\n');          WorklistRemover DeadNodes(*this);          if (isLoad) {            DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -11664,7 +12392,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {    return false;  } -/// \brief Return the base-pointer arithmetic from an indexed \p LD. +/// Return the base-pointer arithmetic from an indexed \p LD.  SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {    ISD::MemIndexedMode AM = LD->getAddressingMode();    assert(AM != ISD::UNINDEXED); @@ -11706,11 +12434,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {          // v3         = add v2, c          // Now we replace use of chain2 with chain1.  This makes the second load          // isomorphic to the one we are deleting, and thus makes this load live. -        DEBUG(dbgs() << "\nReplacing.6 "; -              N->dump(&DAG); -              dbgs() << "\nWith chain: "; -              Chain.getNode()->dump(&DAG); -              dbgs() << "\n"); +        LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); +                   dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); +                   dbgs() << "\n");          WorklistRemover DeadNodes(*this);          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);          AddUsersToWorklist(Chain.getNode()); @@ -11741,11 +12467,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {            AddUsersToWorklist(N);          } else            Index = DAG.getUNDEF(N->getValueType(1)); -        DEBUG(dbgs() << "\nReplacing.7 "; -              N->dump(&DAG); -              dbgs() << "\nWith: "; -              Undef.getNode()->dump(&DAG); -              dbgs() << " and 2 other values\n"); +        LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); +                   dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); +                   dbgs() << " and 2 other values\n");          WorklistRemover DeadNodes(*this);          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); @@ -11773,13 +12497,14 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {    // Try to infer better alignment information than the load already has.    if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {      if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { -      if (Align > LD->getMemOperand()->getBaseAlignment()) { +      if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {          SDValue NewLoad = DAG.getExtLoad(              LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,              LD->getPointerInfo(), LD->getMemoryVT(), Align,              LD->getMemOperand()->getFlags(), LD->getAAInfo()); -        if (NewLoad.getNode() != N) -          return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); +        // NewLoad will always be N as we are only refining the alignment +        assert(NewLoad.getNode() == N); +        (void)NewLoad;        }      }    } @@ -11826,7 +12551,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {  namespace { -/// \brief Helper structure used to slice a load in smaller loads. +/// Helper structure used to slice a load in smaller loads.  /// Basically a slice is obtained from the following sequence:  /// Origin = load Ty1, Base  /// Shift = srl Ty1 Origin, CstTy Amount @@ -11839,7 +12564,7 @@ namespace {  /// SliceTy is deduced from the number of bits that are actually used to  /// build Inst.  struct LoadedSlice { -  /// \brief Helper structure used to compute the cost of a slice. +  /// Helper structure used to compute the cost of a slice.    struct Cost {      /// Are we optimizing for code size.      bool ForCodeSize; @@ -11853,7 +12578,7 @@ struct LoadedSlice {      Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} -    /// \brief Get the cost of one isolated slice. +    /// Get the cost of one isolated slice.      Cost(const LoadedSlice &LS, bool ForCodeSize = false)          : ForCodeSize(ForCodeSize), Loads(1) {        EVT TruncType = LS.Inst->getValueType(0); @@ -11863,7 +12588,7 @@ struct LoadedSlice {          ZExts = 1;      } -    /// \brief Account for slicing gain in the current cost. +    /// Account for slicing gain in the current cost.      /// Slicing provide a few gains like removing a shift or a      /// truncate. This method allows to grow the cost of the original      /// load with the gain from this slice. @@ -11936,7 +12661,7 @@ struct LoadedSlice {                unsigned Shift = 0, SelectionDAG *DAG = nullptr)        : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} -  /// \brief Get the bits used in a chunk of bits \p BitWidth large. +  /// Get the bits used in a chunk of bits \p BitWidth large.    /// \return Result is \p BitWidth and has used bits set to 1 and    ///         not used bits set to 0.    APInt getUsedBits() const { @@ -11956,14 +12681,14 @@ struct LoadedSlice {      return UsedBits;    } -  /// \brief Get the size of the slice to be loaded in bytes. +  /// Get the size of the slice to be loaded in bytes.    unsigned getLoadedSize() const {      unsigned SliceSize = getUsedBits().countPopulation();      assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");      return SliceSize / 8;    } -  /// \brief Get the type that will be loaded for this slice. +  /// Get the type that will be loaded for this slice.    /// Note: This may not be the final type for the slice.    EVT getLoadedType() const {      assert(DAG && "Missing context"); @@ -11971,7 +12696,7 @@ struct LoadedSlice {      return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);    } -  /// \brief Get the alignment of the load used for this slice. +  /// Get the alignment of the load used for this slice.    unsigned getAlignment() const {      unsigned Alignment = Origin->getAlignment();      unsigned Offset = getOffsetFromBase(); @@ -11980,7 +12705,7 @@ struct LoadedSlice {      return Alignment;    } -  /// \brief Check if this slice can be rewritten with legal operations. +  /// Check if this slice can be rewritten with legal operations.    bool isLegal() const {      // An invalid slice is not legal.      if (!Origin || !Inst || !DAG) @@ -12024,7 +12749,7 @@ struct LoadedSlice {      return true;    } -  /// \brief Get the offset in bytes of this slice in the original chunk of +  /// Get the offset in bytes of this slice in the original chunk of    /// bits.    /// \pre DAG != nullptr.    uint64_t getOffsetFromBase() const { @@ -12045,7 +12770,7 @@ struct LoadedSlice {      return Offset;    } -  /// \brief Generate the sequence of instructions to load the slice +  /// Generate the sequence of instructions to load the slice    /// represented by this object and redirect the uses of this slice to    /// this new sequence of instructions.    /// \pre this->Inst && this->Origin are valid Instructions and this @@ -12083,7 +12808,7 @@ struct LoadedSlice {      return LastInst;    } -  /// \brief Check if this slice can be merged with an expensive cross register +  /// Check if this slice can be merged with an expensive cross register    /// bank copy. E.g.,    /// i = load i32    /// f = bitcast i32 i to float @@ -12132,7 +12857,7 @@ struct LoadedSlice {  } // end anonymous namespace -/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// Check that all bits set in \p UsedBits form a dense region, i.e.,  /// \p UsedBits looks like 0..0 1..1 0..0.  static bool areUsedBitsDense(const APInt &UsedBits) {    // If all the bits are one, this is dense! @@ -12148,7 +12873,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {    return NarrowedUsedBits.isAllOnesValue();  } -/// \brief Check whether or not \p First and \p Second are next to each other +/// Check whether or not \p First and \p Second are next to each other  /// in memory. This means that there is no hole between the bits loaded  /// by \p First and the bits loaded by \p Second.  static bool areSlicesNextToEachOther(const LoadedSlice &First, @@ -12162,7 +12887,7 @@ static bool areSlicesNextToEachOther(const LoadedSlice &First,    return areUsedBitsDense(UsedBits);  } -/// \brief Adjust the \p GlobalLSCost according to the target +/// Adjust the \p GlobalLSCost according to the target  /// paring capabilities and the layout of the slices.  /// \pre \p GlobalLSCost should account for at least as many loads as  /// there is in the slices in \p LoadedSlices. @@ -12175,8 +12900,8 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,    // Sort the slices so that elements that are likely to be next to each    // other in memory are next to each other in the list. -  std::sort(LoadedSlices.begin(), LoadedSlices.end(), -            [](const LoadedSlice &LHS, const LoadedSlice &RHS) { +  llvm::sort(LoadedSlices.begin(), LoadedSlices.end(), +             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {      assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");      return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();    }); @@ -12223,7 +12948,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,    }  } -/// \brief Check the profitability of all involved LoadedSlice. +/// Check the profitability of all involved LoadedSlice.  /// Currently, it is considered profitable if there is exactly two  /// involved slices (1) which are (2) next to each other in memory, and  /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). @@ -12267,7 +12992,7 @@ static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,    return OrigCost > GlobalSlicingCost;  } -/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// If the given load, \p LI, is used only by trunc or trunc(lshr)  /// operations, split it in the various pieces being extracted.  ///  /// This sort of thing is introduced by SROA. @@ -12386,22 +13111,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));    if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer. -  // The store should be chained directly to the load or be an operand of a -  // tokenfactor. -  if (LD == Chain.getNode()) -    ; // ok. -  else if (Chain->getOpcode() != ISD::TokenFactor) -    return Result; // Fail. -  else { -    bool isOk = false; -    for (const SDValue &ChainOp : Chain->op_values()) -      if (ChainOp.getNode() == LD) { -        isOk = true; -        break; -      } -    if (!isOk) return Result; -  } -    // This only handles simple types.    if (V.getValueType() != MVT::i16 &&        V.getValueType() != MVT::i32 && @@ -12438,6 +13147,24 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {    // is aligned the same as the access width.    if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; +  // For narrowing to be valid, it must be the case that the load the +  // immediately preceeding memory operation before the store. +  if (LD == Chain.getNode()) +    ; // ok. +  else if (Chain->getOpcode() == ISD::TokenFactor && +           SDValue(LD, 1).hasOneUse()) { +    // LD has only 1 chain use so they are no indirect dependencies. +    bool isOk = false; +    for (const SDValue &ChainOp : Chain->op_values()) +      if (ChainOp.getNode() == LD) { +        isOk = true; +        break; +      } +    if (!isOk) +      return Result; +  } else +    return Result; // Fail. +    Result.first = MaskedBytes;    Result.second = NotMaskTZ/8;    return Result; @@ -12756,12 +13483,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,    return false;  } -static SDValue peekThroughBitcast(SDValue V) { -  while (V.getOpcode() == ISD::BITCAST) -    V = V.getOperand(0); -  return V; -} -  SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,                                           unsigned NumStores) {    SmallVector<SDValue, 8> Chains; @@ -12886,6 +13607,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);        SDValue Val = St->getValue(); +      Val = peekThroughBitcast(Val);        StoreInt <<= ElementSizeBits;        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {          StoreInt |= C->getAPIntValue() @@ -12918,13 +13640,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(                              FirstInChain->getPointerInfo(),                              FirstInChain->getAlignment());    } else { // Must be realized as a trunc store -    EVT LegalizedStoredValueTy = +    EVT LegalizedStoredValTy =          TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); -    unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); +    unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();      ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);      SDValue ExtendedStoreVal =          DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, -                        LegalizedStoredValueTy); +                        LegalizedStoredValTy);      NewStore = DAG.getTruncStore(          NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),          FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, @@ -12941,7 +13663,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(  }  void DAGCombiner::getStoreMergeCandidates( -    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { +    StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, +    SDNode *&RootNode) {    // This holds the base pointer, index, and the offset in bytes from the base    // pointer.    BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); @@ -12970,6 +13693,12 @@ void DAGCombiner::getStoreMergeCandidates(      // Load and store should be the same type.      if (MemVT != LoadVT)        return; +    // Loads must only have one use. +    if (!Ld->hasNUsesOfValue(1, 0)) +      return; +    // The memory operands must not be volatile. +    if (Ld->isVolatile() || Ld->isIndexed()) +      return;    }    auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,                              int64_t &Offset) -> bool { @@ -12987,6 +13716,12 @@ void DAGCombiner::getStoreMergeCandidates(          auto LPtr = BaseIndexOffset::match(OtherLd, DAG);          if (LoadVT != OtherLd->getMemoryVT())            return false; +        // Loads must only have one use. +        if (!OtherLd->hasNUsesOfValue(1, 0)) +          return false; +        // The memory operands must not be volatile. +        if (OtherLd->isVolatile() || OtherLd->isIndexed()) +          return false;          if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))            return false;        } else @@ -13028,7 +13763,7 @@ void DAGCombiner::getStoreMergeCandidates(    // FIXME: We should be able to climb and    // descend TokenFactors to find candidates as well. -  SDNode *RootNode = (St->getChain()).getNode(); +  RootNode = St->getChain().getNode();    if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {      RootNode = Ldn->getChain().getNode(); @@ -13059,31 +13794,54 @@ void DAGCombiner::getStoreMergeCandidates(  // through the chain). Check in parallel by searching up from  // non-chain operands of candidates.  bool DAGCombiner::checkMergeStoreCandidatesForDependencies( -    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { +    SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, +    SDNode *RootNode) {    // FIXME: We should be able to truncate a full search of    // predecessors by doing a BFS and keeping tabs the originating    // stores from which worklist nodes come from in a similar way to    // TokenFactor simplfication. -  SmallPtrSet<const SDNode *, 16> Visited; +  SmallPtrSet<const SDNode *, 32> Visited;    SmallVector<const SDNode *, 8> Worklist; -  unsigned int Max = 8192; + +  // RootNode is a predecessor to all candidates so we need not search +  // past it. Add RootNode (peeking through TokenFactors). Do not count +  // these towards size check. + +  Worklist.push_back(RootNode); +  while (!Worklist.empty()) { +    auto N = Worklist.pop_back_val(); +    if (N->getOpcode() == ISD::TokenFactor) { +      for (SDValue Op : N->ops()) +        Worklist.push_back(Op.getNode()); +    } +    Visited.insert(N); +  } + +  // Don't count pruning nodes towards max. +  unsigned int Max = 1024 + Visited.size();    // Search Ops of store candidates.    for (unsigned i = 0; i < NumStores; ++i) { -    SDNode *n = StoreNodes[i].MemNode; -    // Potential loops may happen only through non-chain operands -    for (unsigned j = 1; j < n->getNumOperands(); ++j) -      Worklist.push_back(n->getOperand(j).getNode()); +    SDNode *N = StoreNodes[i].MemNode; +    // Of the 4 Store Operands: +    //   * Chain (Op 0) -> We have already considered these +    //                    in candidate selection and can be +    //                    safely ignored +    //   * Value (Op 1) -> Cycles may happen (e.g. through load chains) +    //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant +    //                      and so no cycles are possible. +    //   * (Op 3) -> appears to always be undef. Cannot be source of cycle. +    // +    // Thus we need only check predecessors of the value operands. +    auto *Op = N->getOperand(1).getNode(); +    if (Visited.insert(Op).second) +      Worklist.push_back(Op);    }    // Search through DAG. We can stop early if we find a store node. -  for (unsigned i = 0; i < NumStores; ++i) { +  for (unsigned i = 0; i < NumStores; ++i)      if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,                                       Max))        return false; -    // Check if we ended early, failing conservatively if so. -    if (Visited.size() >= Max) -      return false; -  }    return true;  } @@ -13121,8 +13879,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {      return false;    SmallVector<MemOpLink, 8> StoreNodes; +  SDNode *RootNode;    // Find potential store merge candidates by searching through chain sub-DAG -  getStoreMergeCandidates(St, StoreNodes); +  getStoreMergeCandidates(St, StoreNodes, RootNode);    // Check if there is anything to merge.    if (StoreNodes.size() < 2) @@ -13130,10 +13889,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {    // Sort the memory operands according to their distance from the    // base pointer. -  std::sort(StoreNodes.begin(), StoreNodes.end(), -            [](MemOpLink LHS, MemOpLink RHS) { -              return LHS.OffsetFromBase < RHS.OffsetFromBase; -            }); +  llvm::sort(StoreNodes.begin(), StoreNodes.end(), +             [](MemOpLink LHS, MemOpLink RHS) { +               return LHS.OffsetFromBase < RHS.OffsetFromBase; +             });    // Store Merge attempts to merge the lowest stores. This generally    // works out as if successful, as the remaining stores are checked @@ -13177,178 +13936,191 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {        continue;      } -    // Check that we can merge these candidates without causing a cycle -    if (!checkMergeStoreCandidatesForDependencies(StoreNodes, -                                                  NumConsecutiveStores)) { -      StoreNodes.erase(StoreNodes.begin(), -                       StoreNodes.begin() + NumConsecutiveStores); -      continue; -    } -      // The node with the lowest store address.      LLVMContext &Context = *DAG.getContext();      const DataLayout &DL = DAG.getDataLayout();      // Store the constants into memory as one consecutive store.      if (IsConstantSrc) { -      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; -      unsigned FirstStoreAS = FirstInChain->getAddressSpace(); -      unsigned FirstStoreAlign = FirstInChain->getAlignment(); -      unsigned LastLegalType = 1; -      unsigned LastLegalVectorType = 1; -      bool LastIntegerTrunc = false; -      bool NonZero = false; -      unsigned FirstZeroAfterNonZero = NumConsecutiveStores; -      for (unsigned i = 0; i < NumConsecutiveStores; ++i) { -        StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); -        SDValue StoredVal = ST->getValue(); -        bool IsElementZero = false; -        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) -          IsElementZero = C->isNullValue(); -        else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) -          IsElementZero = C->getConstantFPValue()->isNullValue(); -        if (IsElementZero) { -          if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) -            FirstZeroAfterNonZero = i; -        } -        NonZero |= !IsElementZero; +      while (NumConsecutiveStores >= 2) { +        LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; +        unsigned FirstStoreAS = FirstInChain->getAddressSpace(); +        unsigned FirstStoreAlign = FirstInChain->getAlignment(); +        unsigned LastLegalType = 1; +        unsigned LastLegalVectorType = 1; +        bool LastIntegerTrunc = false; +        bool NonZero = false; +        unsigned FirstZeroAfterNonZero = NumConsecutiveStores; +        for (unsigned i = 0; i < NumConsecutiveStores; ++i) { +          StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); +          SDValue StoredVal = ST->getValue(); +          bool IsElementZero = false; +          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) +            IsElementZero = C->isNullValue(); +          else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) +            IsElementZero = C->getConstantFPValue()->isNullValue(); +          if (IsElementZero) { +            if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) +              FirstZeroAfterNonZero = i; +          } +          NonZero |= !IsElementZero; -        // Find a legal type for the constant store. -        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; -        EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); -        bool IsFast = false; -        if (TLI.isTypeLegal(StoreTy) && -            TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && -            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, -                                   FirstStoreAlign, &IsFast) && -            IsFast) { -          LastIntegerTrunc = false; -          LastLegalType = i + 1; -          // Or check whether a truncstore is legal. -        } else if (TLI.getTypeAction(Context, StoreTy) == -                   TargetLowering::TypePromoteInteger) { -          EVT LegalizedStoredValueTy = -              TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); -          if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && -              TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && +          // Find a legal type for the constant store. +          unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; +          EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); +          bool IsFast = false; + +          // Break early when size is too large to be legal. +          if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) +            break; + +          if (TLI.isTypeLegal(StoreTy) && +              TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&                TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,                                       FirstStoreAlign, &IsFast) &&                IsFast) { -            LastIntegerTrunc = true; +            LastIntegerTrunc = false;              LastLegalType = i + 1; +            // Or check whether a truncstore is legal. +          } else if (TLI.getTypeAction(Context, StoreTy) == +                     TargetLowering::TypePromoteInteger) { +            EVT LegalizedStoredValTy = +                TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); +            if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && +                TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && +                TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, +                                       FirstStoreAlign, &IsFast) && +                IsFast) { +              LastIntegerTrunc = true; +              LastLegalType = i + 1; +            }            } -        } -        // We only use vectors if the constant is known to be zero or the target -        // allows it and the function is not marked with the noimplicitfloat -        // attribute. -        if ((!NonZero || -             TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && -            !NoVectors) { -          // Find a legal type for the vector store. -          unsigned Elts = (i + 1) * NumMemElts; -          EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); -          if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && -              TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && -              TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, -                                     FirstStoreAlign, &IsFast) && -              IsFast) -            LastLegalVectorType = i + 1; +          // We only use vectors if the constant is known to be zero or the +          // target allows it and the function is not marked with the +          // noimplicitfloat attribute. +          if ((!NonZero || +               TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && +              !NoVectors) { +            // Find a legal type for the vector store. +            unsigned Elts = (i + 1) * NumMemElts; +            EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); +            if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && +                TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && +                TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, +                                       FirstStoreAlign, &IsFast) && +                IsFast) +              LastLegalVectorType = i + 1; +          }          } -      } -      bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; -      unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; +        bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; +        unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + +        // Check if we found a legal integer type that creates a meaningful +        // merge. +        if (NumElem < 2) { +          // We know that candidate stores are in order and of correct +          // shape. While there is no mergeable sequence from the +          // beginning one may start later in the sequence. The only +          // reason a merge of size N could have failed where another of +          // the same size would not have, is if the alignment has +          // improved or we've dropped a non-zero value. Drop as many +          // candidates as we can here. +          unsigned NumSkip = 1; +          while ( +              (NumSkip < NumConsecutiveStores) && +              (NumSkip < FirstZeroAfterNonZero) && +              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) +            NumSkip++; + +          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); +          NumConsecutiveStores -= NumSkip; +          continue; +        } -      // Check if we found a legal integer type that creates a meaningful merge. -      if (NumElem < 2) { -        // We know that candidate stores are in order and of correct -        // shape. While there is no mergeable sequence from the -        // beginning one may start later in the sequence. The only -        // reason a merge of size N could have failed where another of -        // the same size would not have, is if the alignment has -        // improved or we've dropped a non-zero value. Drop as many -        // candidates as we can here. -        unsigned NumSkip = 1; -        while ( -            (NumSkip < NumConsecutiveStores) && -            (NumSkip < FirstZeroAfterNonZero) && -            (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) { -          NumSkip++; +        // Check that we can merge these candidates without causing a cycle. +        if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, +                                                      RootNode)) { +          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); +          NumConsecutiveStores -= NumElem; +          continue;          } -        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); -        continue; -      } -      bool Merged = MergeStoresOfConstantsOrVecElts( -          StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); -      RV |= Merged; +        RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, +                                              UseVector, LastIntegerTrunc); -      // Remove merged stores for next iteration. -      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); +        // Remove merged stores for next iteration. +        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); +        NumConsecutiveStores -= NumElem; +      }        continue;      }      // When extracting multiple vector elements, try to store them      // in one vector store rather than a sequence of scalar stores.      if (IsExtractVecSrc) { -      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; -      unsigned FirstStoreAS = FirstInChain->getAddressSpace(); -      unsigned FirstStoreAlign = FirstInChain->getAlignment(); -      unsigned NumStoresToMerge = 1; -      for (unsigned i = 0; i < NumConsecutiveStores; ++i) { -        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); -        SDValue StVal = peekThroughBitcast(St->getValue()); -        // This restriction could be loosened. -        // Bail out if any stored values are not elements extracted from a -        // vector. It should be possible to handle mixed sources, but load -        // sources need more careful handling (see the block of code below that -        // handles consecutive loads). -        if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && -            StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) -          return RV; +      // Loop on Consecutive Stores on success. +      while (NumConsecutiveStores >= 2) { +        LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; +        unsigned FirstStoreAS = FirstInChain->getAddressSpace(); +        unsigned FirstStoreAlign = FirstInChain->getAlignment(); +        unsigned NumStoresToMerge = 1; +        for (unsigned i = 0; i < NumConsecutiveStores; ++i) { +          // Find a legal type for the vector store. +          unsigned Elts = (i + 1) * NumMemElts; +          EVT Ty = +              EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); +          bool IsFast; -        // Find a legal type for the vector store. -        unsigned Elts = (i + 1) * NumMemElts; -        EVT Ty = -            EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); -        bool IsFast; -        if (TLI.isTypeLegal(Ty) && -            TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && -            TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, -                                   FirstStoreAlign, &IsFast) && -            IsFast) -          NumStoresToMerge = i + 1; -      } +          // Break early when size is too large to be legal. +          if (Ty.getSizeInBits() > MaximumLegalStoreInBits) +            break; -      // Check if we found a legal integer type that creates a meaningful merge. -      if (NumStoresToMerge < 2) { -        // We know that candidate stores are in order and of correct -        // shape. While there is no mergeable sequence from the -        // beginning one may start later in the sequence. The only -        // reason a merge of size N could have failed where another of -        // the same size would not have, is if the alignment has -        // improved. Drop as many candidates as we can here. -        unsigned NumSkip = 1; -        while ((NumSkip < NumConsecutiveStores) && -               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) -          NumSkip++; +          if (TLI.isTypeLegal(Ty) && +              TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && +              TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, +                                     FirstStoreAlign, &IsFast) && +              IsFast) +            NumStoresToMerge = i + 1; +        } -        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); -        continue; -      } +        // Check if we found a legal integer type creating a meaningful +        // merge. +        if (NumStoresToMerge < 2) { +          // We know that candidate stores are in order and of correct +          // shape. While there is no mergeable sequence from the +          // beginning one may start later in the sequence. The only +          // reason a merge of size N could have failed where another of +          // the same size would not have, is if the alignment has +          // improved. Drop as many candidates as we can here. +          unsigned NumSkip = 1; +          while ( +              (NumSkip < NumConsecutiveStores) && +              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) +            NumSkip++; + +          StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); +          NumConsecutiveStores -= NumSkip; +          continue; +        } + +        // Check that we can merge these candidates without causing a cycle. +        if (!checkMergeStoreCandidatesForDependencies( +                StoreNodes, NumStoresToMerge, RootNode)) { +          StoreNodes.erase(StoreNodes.begin(), +                           StoreNodes.begin() + NumStoresToMerge); +          NumConsecutiveStores -= NumStoresToMerge; +          continue; +        } + +        RV |= MergeStoresOfConstantsOrVecElts( +            StoreNodes, MemVT, NumStoresToMerge, false, true, false); -      bool Merged = MergeStoresOfConstantsOrVecElts( -          StoreNodes, MemVT, NumStoresToMerge, false, true, false); -      if (!Merged) {          StoreNodes.erase(StoreNodes.begin(),                           StoreNodes.begin() + NumStoresToMerge); -        continue; +        NumConsecutiveStores -= NumStoresToMerge;        } -      // Remove merged stores for next iteration. -      StoreNodes.erase(StoreNodes.begin(), -                       StoreNodes.begin() + NumStoresToMerge); -      RV = true;        continue;      } @@ -13362,24 +14134,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {      // Find acceptable loads. Loads need to have the same chain (token factor),      // must not be zext, volatile, indexed, and they must be consecutive.      BaseIndexOffset LdBasePtr; +      for (unsigned i = 0; i < NumConsecutiveStores; ++i) {        StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);        SDValue Val = peekThroughBitcast(St->getValue()); -      LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val); -      if (!Ld) -        break; - -      // Loads must only have one use. -      if (!Ld->hasNUsesOfValue(1, 0)) -        break; - -      // The memory operands must not be volatile. -      if (Ld->isVolatile() || Ld->isIndexed()) -        break; - -      // The stored memory type must be the same. -      if (Ld->getMemoryVT() != MemVT) -        break; +      LoadSDNode *Ld = cast<LoadSDNode>(Val);        BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);        // If this is not the first ptr that we check. @@ -13397,90 +14156,75 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {        LoadNodes.push_back(MemOpLink(Ld, LdOffset));      } -    if (LoadNodes.size() < 2) { -      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); -      continue; -    } +    while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { +      // If we have load/store pair instructions and we only have two values, +      // don't bother merging. +      unsigned RequiredAlignment; +      if (LoadNodes.size() == 2 && +          TLI.hasPairedLoad(MemVT, RequiredAlignment) && +          StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { +        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); +        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); +        break; +      } +      LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; +      unsigned FirstStoreAS = FirstInChain->getAddressSpace(); +      unsigned FirstStoreAlign = FirstInChain->getAlignment(); +      LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); +      unsigned FirstLoadAS = FirstLoad->getAddressSpace(); +      unsigned FirstLoadAlign = FirstLoad->getAlignment(); -    // If we have load/store pair instructions and we only have two values, -    // don't bother merging. -    unsigned RequiredAlignment; -    if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && -        StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { -      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); -      continue; -    } -    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; -    unsigned FirstStoreAS = FirstInChain->getAddressSpace(); -    unsigned FirstStoreAlign = FirstInChain->getAlignment(); -    LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); -    unsigned FirstLoadAS = FirstLoad->getAddressSpace(); -    unsigned FirstLoadAlign = FirstLoad->getAlignment(); +      // Scan the memory operations on the chain and find the first +      // non-consecutive load memory address. These variables hold the index in +      // the store node array. -    // Scan the memory operations on the chain and find the first -    // non-consecutive load memory address. These variables hold the index in -    // the store node array. -    unsigned LastConsecutiveLoad = 1; -    // This variable refers to the size and not index in the array. -    unsigned LastLegalVectorType = 1; -    unsigned LastLegalIntegerType = 1; -    bool isDereferenceable = true; -    bool DoIntegerTruncate = false; -    StartAddress = LoadNodes[0].OffsetFromBase; -    SDValue FirstChain = FirstLoad->getChain(); -    for (unsigned i = 1; i < LoadNodes.size(); ++i) { -      // All loads must share the same chain. -      if (LoadNodes[i].MemNode->getChain() != FirstChain) -        break; +      unsigned LastConsecutiveLoad = 1; -      int64_t CurrAddress = LoadNodes[i].OffsetFromBase; -      if (CurrAddress - StartAddress != (ElementSizeBytes * i)) -        break; -      LastConsecutiveLoad = i; - -      if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) -        isDereferenceable = false; - -      // Find a legal type for the vector store. -      unsigned Elts = (i + 1) * NumMemElts; -      EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - -      bool IsFastSt, IsFastLd; -      if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && -          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, -                                 FirstStoreAlign, &IsFastSt) && -          IsFastSt && -          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, -                                 FirstLoadAlign, &IsFastLd) && -          IsFastLd) { -        LastLegalVectorType = i + 1; -      } +      // This variable refers to the size and not index in the array. +      unsigned LastLegalVectorType = 1; +      unsigned LastLegalIntegerType = 1; +      bool isDereferenceable = true; +      bool DoIntegerTruncate = false; +      StartAddress = LoadNodes[0].OffsetFromBase; +      SDValue FirstChain = FirstLoad->getChain(); +      for (unsigned i = 1; i < LoadNodes.size(); ++i) { +        // All loads must share the same chain. +        if (LoadNodes[i].MemNode->getChain() != FirstChain) +          break; + +        int64_t CurrAddress = LoadNodes[i].OffsetFromBase; +        if (CurrAddress - StartAddress != (ElementSizeBytes * i)) +          break; +        LastConsecutiveLoad = i; + +        if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) +          isDereferenceable = false; + +        // Find a legal type for the vector store. +        unsigned Elts = (i + 1) * NumMemElts; +        EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + +        // Break early when size is too large to be legal. +        if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) +          break; + +        bool IsFastSt, IsFastLd; +        if (TLI.isTypeLegal(StoreTy) && +            TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && +            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, +                                   FirstStoreAlign, &IsFastSt) && +            IsFastSt && +            TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, +                                   FirstLoadAlign, &IsFastLd) && +            IsFastLd) { +          LastLegalVectorType = i + 1; +        } -      // Find a legal type for the integer store. -      unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; -      StoreTy = EVT::getIntegerVT(Context, SizeInBits); -      if (TLI.isTypeLegal(StoreTy) && -          TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && -          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, -                                 FirstStoreAlign, &IsFastSt) && -          IsFastSt && -          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, -                                 FirstLoadAlign, &IsFastLd) && -          IsFastLd) { -        LastLegalIntegerType = i + 1; -        DoIntegerTruncate = false; -        // Or check whether a truncstore and extload is legal. -      } else if (TLI.getTypeAction(Context, StoreTy) == -                 TargetLowering::TypePromoteInteger) { -        EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); -        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && -            TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && -            TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, -                               StoreTy) && -            TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, -                               StoreTy) && -            TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && +        // Find a legal type for the integer store. +        unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; +        StoreTy = EVT::getIntegerVT(Context, SizeInBits); +        if (TLI.isTypeLegal(StoreTy) && +            TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,                                     FirstStoreAlign, &IsFastSt) &&              IsFastSt && @@ -13488,105 +14232,140 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {                                     FirstLoadAlign, &IsFastLd) &&              IsFastLd) {            LastLegalIntegerType = i + 1; -          DoIntegerTruncate = true; +          DoIntegerTruncate = false; +          // Or check whether a truncstore and extload is legal. +        } else if (TLI.getTypeAction(Context, StoreTy) == +                   TargetLowering::TypePromoteInteger) { +          EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); +          if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && +              TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && +              TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, +                                 StoreTy) && +              TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, +                                 StoreTy) && +              TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && +              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, +                                     FirstStoreAlign, &IsFastSt) && +              IsFastSt && +              TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, +                                     FirstLoadAlign, &IsFastLd) && +              IsFastLd) { +            LastLegalIntegerType = i + 1; +            DoIntegerTruncate = true; +          }          }        } -    } -    // Only use vector types if the vector type is larger than the integer type. -    // If they are the same, use integers. -    bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; -    unsigned LastLegalType = -        std::max(LastLegalVectorType, LastLegalIntegerType); - -    // We add +1 here because the LastXXX variables refer to location while -    // the NumElem refers to array/index size. -    unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); -    NumElem = std::min(LastLegalType, NumElem); - -    if (NumElem < 2) { -      // We know that candidate stores are in order and of correct -      // shape. While there is no mergeable sequence from the -      // beginning one may start later in the sequence. The only -      // reason a merge of size N could have failed where another of -      // the same size would not have is if the alignment or either -      // the load or store has improved. Drop as many candidates as we -      // can here. -      unsigned NumSkip = 1; -      while ((NumSkip < LoadNodes.size()) && -             (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && -             (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) -        NumSkip++; -      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); -      continue; -    } +      // Only use vector types if the vector type is larger than the integer +      // type. If they are the same, use integers. +      bool UseVectorTy = +          LastLegalVectorType > LastLegalIntegerType && !NoVectors; +      unsigned LastLegalType = +          std::max(LastLegalVectorType, LastLegalIntegerType); -    // Find if it is better to use vectors or integers to load and store -    // to memory. -    EVT JointMemOpVT; -    if (UseVectorTy) { -      // Find a legal type for the vector store. -      unsigned Elts = NumElem * NumMemElts; -      JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); -    } else { -      unsigned SizeInBits = NumElem * ElementSizeBytes * 8; -      JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); -    } - -    SDLoc LoadDL(LoadNodes[0].MemNode); -    SDLoc StoreDL(StoreNodes[0].MemNode); - -    // The merged loads are required to have the same incoming chain, so -    // using the first's chain is acceptable. - -    SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); -    AddToWorklist(NewStoreChain.getNode()); - -    MachineMemOperand::Flags MMOFlags = isDereferenceable ? -                                          MachineMemOperand::MODereferenceable: -                                          MachineMemOperand::MONone; - -    SDValue NewLoad, NewStore; -    if (UseVectorTy || !DoIntegerTruncate) { -      NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), -                            FirstLoad->getBasePtr(), -                            FirstLoad->getPointerInfo(), FirstLoadAlign, -                            MMOFlags); -      NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, -                              FirstInChain->getBasePtr(), -                              FirstInChain->getPointerInfo(), FirstStoreAlign); -    } else { // This must be the truncstore/extload case -      EVT ExtendedTy = -          TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); -      NewLoad = -          DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), -                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), -                         JointMemOpVT, FirstLoadAlign, MMOFlags); -      NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, -                                   FirstInChain->getBasePtr(), -                                   FirstInChain->getPointerInfo(), JointMemOpVT, -                                   FirstInChain->getAlignment(), -                                   FirstInChain->getMemOperand()->getFlags()); -    } - -    // Transfer chain users from old loads to the new load. -    for (unsigned i = 0; i < NumElem; ++i) { -      LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); -      DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), -                                    SDValue(NewLoad.getNode(), 1)); -    } - -    // Replace the all stores with the new store. Recursively remove -    // corresponding value if its no longer used. -    for (unsigned i = 0; i < NumElem; ++i) { -      SDValue Val = StoreNodes[i].MemNode->getOperand(1); -      CombineTo(StoreNodes[i].MemNode, NewStore); -      if (Val.getNode()->use_empty()) -        recursivelyDeleteUnusedNodes(Val.getNode()); -    } - -    RV = true; -    StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); +      // We add +1 here because the LastXXX variables refer to location while +      // the NumElem refers to array/index size. +      unsigned NumElem = +          std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); +      NumElem = std::min(LastLegalType, NumElem); + +      if (NumElem < 2) { +        // We know that candidate stores are in order and of correct +        // shape. While there is no mergeable sequence from the +        // beginning one may start later in the sequence. The only +        // reason a merge of size N could have failed where another of +        // the same size would not have is if the alignment or either +        // the load or store has improved. Drop as many candidates as we +        // can here. +        unsigned NumSkip = 1; +        while ((NumSkip < LoadNodes.size()) && +               (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && +               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) +          NumSkip++; +        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); +        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); +        NumConsecutiveStores -= NumSkip; +        continue; +      } + +      // Check that we can merge these candidates without causing a cycle. +      if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, +                                                    RootNode)) { +        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); +        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); +        NumConsecutiveStores -= NumElem; +        continue; +      } + +      // Find if it is better to use vectors or integers to load and store +      // to memory. +      EVT JointMemOpVT; +      if (UseVectorTy) { +        // Find a legal type for the vector store. +        unsigned Elts = NumElem * NumMemElts; +        JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); +      } else { +        unsigned SizeInBits = NumElem * ElementSizeBytes * 8; +        JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); +      } + +      SDLoc LoadDL(LoadNodes[0].MemNode); +      SDLoc StoreDL(StoreNodes[0].MemNode); + +      // The merged loads are required to have the same incoming chain, so +      // using the first's chain is acceptable. + +      SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); +      AddToWorklist(NewStoreChain.getNode()); + +      MachineMemOperand::Flags MMOFlags = +          isDereferenceable ? MachineMemOperand::MODereferenceable +                            : MachineMemOperand::MONone; + +      SDValue NewLoad, NewStore; +      if (UseVectorTy || !DoIntegerTruncate) { +        NewLoad = +            DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), +                        FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), +                        FirstLoadAlign, MMOFlags); +        NewStore = DAG.getStore( +            NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), +            FirstInChain->getPointerInfo(), FirstStoreAlign); +      } else { // This must be the truncstore/extload case +        EVT ExtendedTy = +            TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); +        NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, +                                 FirstLoad->getChain(), FirstLoad->getBasePtr(), +                                 FirstLoad->getPointerInfo(), JointMemOpVT, +                                 FirstLoadAlign, MMOFlags); +        NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, +                                     FirstInChain->getBasePtr(), +                                     FirstInChain->getPointerInfo(), +                                     JointMemOpVT, FirstInChain->getAlignment(), +                                     FirstInChain->getMemOperand()->getFlags()); +      } + +      // Transfer chain users from old loads to the new load. +      for (unsigned i = 0; i < NumElem; ++i) { +        LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); +        DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), +                                      SDValue(NewLoad.getNode(), 1)); +      } + +      // Replace the all stores with the new store. Recursively remove +      // corresponding value if its no longer used. +      for (unsigned i = 0; i < NumElem; ++i) { +        SDValue Val = StoreNodes[i].MemNode->getOperand(1); +        CombineTo(StoreNodes[i].MemNode, NewStore); +        if (Val.getNode()->use_empty()) +          recursivelyDeleteUnusedNodes(Val.getNode()); +      } + +      RV = true; +      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); +      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); +      NumConsecutiveStores -= NumElem; +    }    }    return RV;  } @@ -13728,13 +14507,14 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {    // Try to infer better alignment information than the store already has.    if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {      if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { -      if (Align > ST->getAlignment()) { +      if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {          SDValue NewStore =              DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),                                ST->getMemoryVT(), Align,                                ST->getMemOperand()->getFlags(), ST->getAAInfo()); -        if (NewStore.getNode() != N) -          return CombineTo(ST, NewStore, true); +        // NewStore will always be N as we are only refining the alignment +        assert(NewStore.getNode() == N); +        (void)NewStore;        }      }    } @@ -14216,6 +14996,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {    SDValue EltNo = N->getOperand(1);    ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); +  // extract_vector_elt of out-of-bounds element -> UNDEF +  if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements())) +    return DAG.getUNDEF(NVT); +    // extract_vector_elt (build_vector x, y), 1 -> y    if (ConstEltNo &&        InVec.getOpcode() == ISD::BUILD_VECTOR && @@ -14301,6 +15085,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {      }    } +  // If only EXTRACT_VECTOR_ELT nodes use the source vector we can +  // simplify it based on the (valid) extraction indices. +  if (llvm::all_of(InVec->uses(), [&](SDNode *Use) { +        return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && +               Use->getOperand(0) == InVec && +               isa<ConstantSDNode>(Use->getOperand(1)); +      })) { +    APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements()); +    for (SDNode *Use : InVec->uses()) { +      auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); +      if (CstElt->getAPIntValue().ult(VT.getVectorNumElements())) +        DemandedElts.setBit(CstElt->getZExtValue()); +    } +    if (SimplifyDemandedVectorElts(InVec, DemandedElts, true)) +      return SDValue(N, 0); +  } +    bool BCNumEltsChanged = false;    EVT ExtVT = VT.getVectorElementType();    EVT LVT = ExtVT; @@ -14507,7 +15308,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {    assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&           "Invalid vector size");    // Check if the new vector type is legal. -  if (!isTypeLegal(VecVT)) return SDValue(); +  if (!isTypeLegal(VecVT) || +      (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && +       TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) +    return SDValue();    // Make the new BUILD_VECTOR.    SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); @@ -14754,12 +15558,16 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {      }      // Not an undef or zero. If the input is something other than an -    // EXTRACT_VECTOR_ELT with a constant index, bail out. +    // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.      if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||          !isa<ConstantSDNode>(Op.getOperand(1)))        return SDValue();      SDValue ExtractedFromVec = Op.getOperand(0); +    APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); +    if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) +      return SDValue(); +      // All inputs must have the same element type as the output.      if (VT.getVectorElementType() !=          ExtractedFromVec.getValueType().getVectorElementType()) @@ -14915,6 +15723,54 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {    return Shuffles[0];  } +// Try to turn a build vector of zero extends of extract vector elts into a +// a vector zero extend and possibly an extract subvector. +// TODO: Support sign extend or any extend? +// TODO: Allow undef elements? +// TODO: Don't require the extracts to start at element 0. +SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { +  if (LegalOperations) +    return SDValue(); + +  EVT VT = N->getValueType(0); + +  SDValue Op0 = N->getOperand(0); +  auto checkElem = [&](SDValue Op) -> int64_t { +    if (Op.getOpcode() == ISD::ZERO_EXTEND && +        Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && +        Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) +      if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) +        return C->getZExtValue(); +    return -1; +  }; + +  // Make sure the first element matches +  // (zext (extract_vector_elt X, C)) +  int64_t Offset = checkElem(Op0); +  if (Offset < 0) +    return SDValue(); + +  unsigned NumElems = N->getNumOperands(); +  SDValue In = Op0.getOperand(0).getOperand(0); +  EVT InSVT = In.getValueType().getScalarType(); +  EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); + +  // Don't create an illegal input type after type legalization. +  if (LegalTypes && !TLI.isTypeLegal(InVT)) +    return SDValue(); + +  // Ensure all the elements come from the same vector and are adjacent. +  for (unsigned i = 1; i != NumElems; ++i) { +    if ((Offset + i) != checkElem(N->getOperand(i))) +      return SDValue(); +  } + +  SDLoc DL(N); +  In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, +                   Op0.getOperand(0).getOperand(1)); +  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In); +} +  SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {    EVT VT = N->getValueType(0); @@ -14922,6 +15778,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {    if (ISD::allOperandsUndef(N))      return DAG.getUNDEF(VT); +  // If this is a splat of a bitcast from another vector, change to a +  // concat_vector. +  // For example: +  //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> +  //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) +  // +  // If X is a build_vector itself, the concat can become a larger build_vector. +  // TODO: Maybe this is useful for non-splat too? +  if (!LegalOperations) { +    if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { +      Splat = peekThroughBitcast(Splat); +      EVT SrcVT = Splat.getValueType(); +      if (SrcVT.isVector()) { +        unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); +        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), +                                     SrcVT.getVectorElementType(), NumElts); +        if (!LegalTypes || TLI.isTypeLegal(NewVT)) { +          SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); +          SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), +                                       NewVT, Ops); +          return DAG.getBitcast(VT, Concat); +        } +      } +    } +  } +    // Check if we can express BUILD VECTOR via subvector extract.    if (!LegalTypes && (N->getNumOperands() > 1)) {      SDValue Op0 = N->getOperand(0); @@ -14951,6 +15833,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {                           Op0.getOperand(0), Op0.getOperand(1));    } +  if (SDValue V = convertBuildVecZextToZext(N)) +    return V; +    if (SDValue V = reduceBuildVecExtToExtBuildVec(N))      return V; @@ -15140,6 +16025,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {        if (!SclTy.isFloatingPoint() && !SclTy.isInteger())          return SDValue(); +      // Bail out if the vector size is not a multiple of the scalar size. +      if (VT.getSizeInBits() % SclTy.getSizeInBits()) +        return SDValue(); +        unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();        if (VNTNumElms < 2)          return SDValue(); @@ -15418,13 +16307,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {        // Only do this if we won't split any elements.        if (ExtractSize % EltSize == 0) {          unsigned NumElems = ExtractSize / EltSize; -        EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), -                                         InVT.getVectorElementType(), NumElems); -        if ((!LegalOperations || -             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) && +        EVT EltVT = InVT.getVectorElementType(); +        EVT ExtractVT = NumElems == 1 ? EltVT : +          EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); +        if ((Level < AfterLegalizeDAG || +             (NumElems == 1 || +              TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&              (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {            unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /                              EltSize; +          if (NumElems == 1) { +            SDValue Src = V->getOperand(IdxVal); +            if (EltVT != Src.getValueType()) +              Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); + +            return DAG.getBitcast(NVT, Src); +          }            // Extract the pieces from the original build_vector.            SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), @@ -15466,122 +16364,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {    if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))      return NarrowBOp; -  return SDValue(); -} - -static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, -                                                 SDValue V, SelectionDAG &DAG) { -  SDLoc DL(V); -  EVT VT = V.getValueType(); - -  switch (V.getOpcode()) { -  default: -    return V; - -  case ISD::CONCAT_VECTORS: { -    EVT OpVT = V->getOperand(0).getValueType(); -    int OpSize = OpVT.getVectorNumElements(); -    SmallBitVector OpUsedElements(OpSize, false); -    bool FoundSimplification = false; -    SmallVector<SDValue, 4> NewOps; -    NewOps.reserve(V->getNumOperands()); -    for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { -      SDValue Op = V->getOperand(i); -      bool OpUsed = false; -      for (int j = 0; j < OpSize; ++j) -        if (UsedElements[i * OpSize + j]) { -          OpUsedElements[j] = true; -          OpUsed = true; -        } -      NewOps.push_back( -          OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) -                 : DAG.getUNDEF(OpVT)); -      FoundSimplification |= Op == NewOps.back(); -      OpUsedElements.reset(); -    } -    if (FoundSimplification) -      V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); -    return V; -  } - -  case ISD::INSERT_SUBVECTOR: { -    SDValue BaseV = V->getOperand(0); -    SDValue SubV = V->getOperand(1); -    auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); -    if (!IdxN) -      return V; - -    int SubSize = SubV.getValueType().getVectorNumElements(); -    int Idx = IdxN->getZExtValue(); -    bool SubVectorUsed = false; -    SmallBitVector SubUsedElements(SubSize, false); -    for (int i = 0; i < SubSize; ++i) -      if (UsedElements[i + Idx]) { -        SubVectorUsed = true; -        SubUsedElements[i] = true; -        UsedElements[i + Idx] = false; -      } - -    // Now recurse on both the base and sub vectors. -    SDValue SimplifiedSubV = -        SubVectorUsed -            ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) -            : DAG.getUNDEF(SubV.getValueType()); -    SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); -    if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) -      V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, -                      SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); -    return V; -  } -  } -} - -static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, -                                       SDValue N1, SelectionDAG &DAG) { -  EVT VT = SVN->getValueType(0); -  int NumElts = VT.getVectorNumElements(); -  SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); -  for (int M : SVN->getMask()) -    if (M >= 0 && M < NumElts) -      N0UsedElements[M] = true; -    else if (M >= NumElts) -      N1UsedElements[M - NumElts] = true; - -  SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); -  SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); -  if (S0 == N0 && S1 == N1) -    return SDValue(); - -  return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); -} - -static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, -                                   SDValue N1, SelectionDAG &DAG) { -  auto isUndefElt = [](SDValue V, int Idx) { -    // TODO - handle more cases as required. -    if (V.getOpcode() == ISD::BUILD_VECTOR) -      return V.getOperand(Idx).isUndef(); -    if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) -      return (Idx != 0) || V.getOperand(0).isUndef(); -    return false; -  }; - -  EVT VT = SVN->getValueType(0); -  unsigned NumElts = VT.getVectorNumElements(); - -  bool Changed = false; -  SmallVector<int, 8> NewMask; -  for (unsigned i = 0; i != NumElts; ++i) { -    int Idx = SVN->getMaskElt(i); -    if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) || -        ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) { -      Changed = true; -      Idx = -1; -    } -    NewMask.push_back(Idx); -  } -  if (Changed) -    return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); +  if (SimplifyDemandedVectorElts(SDValue(N, 0))) +    return SDValue(N, 0);    return SDValue();  } @@ -16028,10 +16812,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {        return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);    } -  // Simplify shuffle mask if a referenced element is UNDEF. -  if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) -    return V; -    if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))      return InsElt; @@ -16092,11 +16872,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {      }    } -  // There are various patterns used to build up a vector from smaller vectors, -  // subvectors, or elements. Scan chains of these and replace unused insertions -  // or components with undef. -  if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) -    return S; +  // Simplify source operands based on shuffle mask. +  if (SimplifyDemandedVectorElts(SDValue(N, 0))) +    return SDValue(N, 0);    // Match shuffles that can be converted to any_vector_extend_in_reg.    if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) @@ -16422,10 +17200,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {    if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {      SDValue CN0 = N0.getOperand(0);      SDValue CN1 = N1.getOperand(0); -    if (CN0.getValueType().getVectorElementType() == -            CN1.getValueType().getVectorElementType() && -        CN0.getValueType().getVectorNumElements() == -            VT.getVectorNumElements()) { +    EVT CN0VT = CN0.getValueType(); +    EVT CN1VT = CN1.getValueType(); +    if (CN0VT.isVector() && CN1VT.isVector() && +        CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && +        CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {        SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),                                        CN0.getValueType(), CN0, CN1, N2);        return DAG.getBitcast(VT, NewINSERT); @@ -16680,14 +17459,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,        const ConstantFPSDNode *Zero = nullptr;        if (TheSelect->getOpcode() == ISD::SELECT_CC) { -        CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); +        CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();          CmpLHS = TheSelect->getOperand(0);          Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));        } else {          // SELECT or VSELECT          SDValue Cmp = TheSelect->getOperand(0);          if (Cmp.getOpcode() == ISD::SETCC) { -          CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); +          CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();            CmpLHS = Cmp.getOperand(0);            Zero = isConstOrConstSplatFP(Cmp.getOperand(1));          } @@ -16905,24 +17684,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,      return !SCCC->isNullValue() ? N2 : N3;    } -  // Check to see if we can simplify the select into an fabs node -  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { -    // Allow either -0.0 or 0.0 -    if (CFP->isZero()) { -      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs -      if ((CC == ISD::SETGE || CC == ISD::SETGT) && -          N0 == N2 && N3.getOpcode() == ISD::FNEG && -          N2 == N3.getOperand(0)) -        return DAG.getNode(ISD::FABS, DL, VT, N0); - -      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs -      if ((CC == ISD::SETLT || CC == ISD::SETLE) && -          N0 == N3 && N2.getOpcode() == ISD::FNEG && -          N2.getOperand(0) == N3) -        return DAG.getNode(ISD::FABS, DL, VT, N3); -    } -  } -    // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"    // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0    // in it.  This is a win when the constant is not otherwise available because @@ -17400,19 +18161,34 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,              : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);        if (!Reciprocal) { -        // Unfortunately, Est is now NaN if the input was exactly 0.0. -        // Select out this case and force the answer to 0.0. +        // The estimate is now completely wrong if the input was exactly 0.0 or +        // possibly a denormal. Force the answer to 0.0 for those cases.          EVT VT = Op.getValueType();          SDLoc DL(Op); - -        SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);          EVT CCVT = getSetCCResultType(VT); -        SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); -        AddToWorklist(ZeroCmp.getNode()); - -        Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, -                          ZeroCmp, FPZero, Est); -        AddToWorklist(Est.getNode()); +        ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; +        const Function &F = DAG.getMachineFunction().getFunction(); +        Attribute Denorms = F.getFnAttribute("denormal-fp-math"); +        if (Denorms.getValueAsString().equals("ieee")) { +          // fabs(X) < SmallestNormal ? 0.0 : Est +          const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); +          APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); +          SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); +          SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); +          SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); +          SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); +          Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); +          AddToWorklist(Fabs.getNode()); +          AddToWorklist(IsDenorm.getNode()); +          AddToWorklist(Est.getNode()); +        } else { +          // X == 0.0 ? 0.0 : Est +          SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); +          SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); +          Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); +          AddToWorklist(IsZero.getNode()); +          AddToWorklist(Est.getNode()); +        }        }      }      return Est; @@ -17715,7 +18491,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {          Index = nullptr;          break;        } -    } // end while +    }// end while    }    // At this point, ChainedStores lists all of the Store nodes diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 3c856914053b..e4a9d557d386 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -61,7 +61,6 @@  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/StackMaps.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetLowering.h" @@ -99,6 +98,7 @@  #include "llvm/Support/Casting.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h" @@ -113,6 +113,11 @@ using namespace llvm;  #define DEBUG_TYPE "isel" +// FIXME: Remove this after the feature has proven reliable. +static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values", +                                     cl::init(true), cl::Hidden, +                                     cl::desc("Sink local values in FastISel")); +  STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "                                           "target-independent selector");  STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " @@ -120,9 +125,10 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "  STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");  /// Set the current block to which generated machine instructions will be -/// appended, and clear the local CSE map. +/// appended.  void FastISel::startNewBlock() { -  LocalValueMap.clear(); +  assert(LocalValueMap.empty() && +         "local values should be cleared after finishing a BB");    // Instructions are appended to FuncInfo.MBB. If the basic block already    // contains labels or copies, use the last instruction as the last local @@ -133,6 +139,9 @@ void FastISel::startNewBlock() {    LastLocalValue = EmitStartPt;  } +/// Flush the local CSE map and sink anything we can. +void FastISel::finishBasicBlock() { flushLocalValueMap(); } +  bool FastISel::lowerArguments() {    if (!FuncInfo.CanLowerReturn)      // Fallback to SDISel argument lowering code to deal with sret pointer @@ -153,11 +162,168 @@ bool FastISel::lowerArguments() {    return true;  } +/// Return the defined register if this instruction defines exactly one +/// virtual register and uses no other virtual registers. Otherwise return 0. +static unsigned findSinkableLocalRegDef(MachineInstr &MI) { +  unsigned RegDef = 0; +  for (const MachineOperand &MO : MI.operands()) { +    if (!MO.isReg()) +      continue; +    if (MO.isDef()) { +      if (RegDef) +        return 0; +      RegDef = MO.getReg(); +    } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { +      // This is another use of a vreg. Don't try to sink it. +      return 0; +    } +  } +  return RegDef; +} +  void FastISel::flushLocalValueMap() { +  // Try to sink local values down to their first use so that we can give them a +  // better debug location. This has the side effect of shrinking local value +  // live ranges, which helps out fast regalloc. +  if (SinkLocalValues && LastLocalValue != EmitStartPt) { +    // Sink local value materialization instructions between EmitStartPt and +    // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to +    // avoid inserting into the range that we're iterating over. +    MachineBasicBlock::reverse_iterator RE = +        EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) +                    : FuncInfo.MBB->rend(); +    MachineBasicBlock::reverse_iterator RI(LastLocalValue); + +    InstOrderMap OrderMap; +    for (; RI != RE;) { +      MachineInstr &LocalMI = *RI; +      ++RI; +      bool Store = true; +      if (!LocalMI.isSafeToMove(nullptr, Store)) +        continue; +      unsigned DefReg = findSinkableLocalRegDef(LocalMI); +      if (DefReg == 0) +        continue; + +      sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap); +    } +  } +    LocalValueMap.clear();    LastLocalValue = EmitStartPt;    recomputeInsertPt();    SavedInsertPt = FuncInfo.InsertPt; +  LastFlushPoint = FuncInfo.InsertPt; +} + +static bool isRegUsedByPhiNodes(unsigned DefReg, +                                FunctionLoweringInfo &FuncInfo) { +  for (auto &P : FuncInfo.PHINodesToUpdate) +    if (P.second == DefReg) +      return true; +  return false; +} + +/// Build a map of instruction orders. Return the first terminator and its +/// order. Consider EH_LABEL instructions to be terminators as well, since local +/// values for phis after invokes must be materialized before the call. +void FastISel::InstOrderMap::initialize( +    MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) { +  unsigned Order = 0; +  for (MachineInstr &I : *MBB) { +    if (!FirstTerminator && +        (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { +      FirstTerminator = &I; +      FirstTerminatorOrder = Order; +    } +    Orders[&I] = Order++; + +    // We don't need to order instructions past the last flush point. +    if (I.getIterator() == LastFlushPoint) +      break; +  } +} + +void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, +                                             unsigned DefReg, +                                             InstOrderMap &OrderMap) { +  // If this register is used by a register fixup, MRI will not contain all +  // the uses until after register fixups, so don't attempt to sink or DCE +  // this instruction. Register fixups typically come from no-op cast +  // instructions, which replace the cast instruction vreg with the local +  // value vreg. +  if (FuncInfo.RegsWithFixups.count(DefReg)) +    return; + +  // We can DCE this instruction if there are no uses and it wasn't a +  // materialized for a successor PHI node. +  bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); +  if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { +    if (EmitStartPt == &LocalMI) +      EmitStartPt = EmitStartPt->getPrevNode(); +    LLVM_DEBUG(dbgs() << "removing dead local value materialization " +                      << LocalMI); +    OrderMap.Orders.erase(&LocalMI); +    LocalMI.eraseFromParent(); +    return; +  } + +  // Number the instructions if we haven't yet so we can efficiently find the +  // earliest use. +  if (OrderMap.Orders.empty()) +    OrderMap.initialize(FuncInfo.MBB, LastFlushPoint); + +  // Find the first user in the BB. +  MachineInstr *FirstUser = nullptr; +  unsigned FirstOrder = std::numeric_limits<unsigned>::max(); +  for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) { +    auto I = OrderMap.Orders.find(&UseInst); +    assert(I != OrderMap.Orders.end() && +           "local value used by instruction outside local region"); +    unsigned UseOrder = I->second; +    if (UseOrder < FirstOrder) { +      FirstOrder = UseOrder; +      FirstUser = &UseInst; +    } +  } + +  // The insertion point will be the first terminator or the first user, +  // whichever came first. If there was no terminator, this must be a +  // fallthrough block and the insertion point is the end of the block. +  MachineBasicBlock::instr_iterator SinkPos; +  if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) { +    FirstOrder = OrderMap.FirstTerminatorOrder; +    SinkPos = OrderMap.FirstTerminator->getIterator(); +  } else if (FirstUser) { +    SinkPos = FirstUser->getIterator(); +  } else { +    assert(UsedByPHI && "must be users if not used by a phi"); +    SinkPos = FuncInfo.MBB->instr_end(); +  } + +  // Collect all DBG_VALUEs before the new insertion position so that we can +  // sink them. +  SmallVector<MachineInstr *, 1> DbgValues; +  for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) { +    if (!DbgVal.isDebugValue()) +      continue; +    unsigned UseOrder = OrderMap.Orders[&DbgVal]; +    if (UseOrder < FirstOrder) +      DbgValues.push_back(&DbgVal); +  } + +  // Sink LocalMI before SinkPos and assign it the same DebugLoc. +  LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI); +  FuncInfo.MBB->remove(&LocalMI); +  FuncInfo.MBB->insert(SinkPos, &LocalMI); +  if (SinkPos != FuncInfo.MBB->end()) +    LocalMI.setDebugLoc(SinkPos->getDebugLoc()); + +  // Sink any debug values that we've collected. +  for (MachineInstr *DI : DbgValues) { +    FuncInfo.MBB->remove(DI); +    FuncInfo.MBB->insert(SinkPos, DI); +  }  }  bool FastISel::hasTrivialKill(const Value *V) { @@ -328,8 +494,10 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {      AssignedReg = Reg;    else if (Reg != AssignedReg) {      // Arrange for uses of AssignedReg to be replaced by uses of Reg. -    for (unsigned i = 0; i < NumRegs; i++) +    for (unsigned i = 0; i < NumRegs; i++) {        FuncInfo.RegFixups[AssignedReg + i] = Reg + i; +      FuncInfo.RegsWithFixups.insert(Reg + i); +    }      AssignedReg = Reg;    } @@ -681,7 +849,7 @@ bool FastISel::selectStackmap(const CallInst *I) {    return true;  } -/// \brief Lower an argument list according to the target calling convention. +/// Lower an argument list according to the target calling convention.  ///  /// This is a helper for lowering intrinsics that follow a target calling  /// convention or require stack pointer adjustment. Only a subset of the @@ -702,7 +870,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,      ArgListEntry Entry;      Entry.Val = V;      Entry.Ty = V->getType(); -    Entry.setAttributes(&CS, ArgIdx); +    Entry.setAttributes(&CS, ArgI);      Args.push_back(Entry);    } @@ -874,10 +1042,31 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {                TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));    for (auto &MO : Ops)      MIB.add(MO); +    // Insert the Patchable Event Call instruction, that gets lowered properly.    return true;  } +bool FastISel::selectXRayTypedEvent(const CallInst *I) { +  const auto &Triple = TM.getTargetTriple(); +  if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) +    return true; // don't do anything to this instruction. +  SmallVector<MachineOperand, 8> Ops; +  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), +                                          /*IsDef=*/false)); +  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), +                                          /*IsDef=*/false)); +  Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), +                                          /*IsDef=*/false)); +  MachineInstrBuilder MIB = +      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, +              TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); +  for (auto &MO : Ops) +    MIB.add(MO); + +  // Insert the Patchable Typed Event Call instruction, that gets lowered properly. +  return true; +}  /// Returns an AttributeList representing the attributes applied to the return  /// value of the given call. @@ -1141,13 +1330,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {      const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);      assert(DI->getVariable() && "Missing variable");      if (!FuncInfo.MF->getMMI().hasDebugInfo()) { -      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");        return true;      }      const Value *Address = DI->getAddress();      if (!Address || isa<UndefValue>(Address)) { -      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");        return true;      } @@ -1182,24 +1371,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {      if (Op) {        assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&               "Expected inlined-at fields to agree"); -      if (Op->isReg()) { -        Op->setIsDebug(true); -        // A dbg.declare describes the address of a source variable, so lower it -        // into an indirect DBG_VALUE. -        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, -                TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, -                Op->getReg(), DI->getVariable(), DI->getExpression()); -      } else -        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, -                TII.get(TargetOpcode::DBG_VALUE)) -            .add(*Op) -            .addImm(0) -            .addMetadata(DI->getVariable()) -            .addMetadata(DI->getExpression()); +      // A dbg.declare describes the address of a source variable, so lower it +      // into an indirect DBG_VALUE. +      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, +              TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, +              *Op, DI->getVariable(), DI->getExpression());      } else {        // We can't yet handle anything else here because it would require        // generating code, thus altering codegen because of debug info. -      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");      }      return true;    } @@ -1242,7 +1422,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {      } else {        // We can't yet handle anything else here because it would require        // generating code, thus altering codegen because of debug info. -      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");      }      return true;    } @@ -1256,7 +1436,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {      updateValueMap(II, ResultReg);      return true;    } -  case Intrinsic::invariant_group_barrier: +  case Intrinsic::launder_invariant_group: +  case Intrinsic::strip_invariant_group:    case Intrinsic::expect: {      unsigned ResultReg = getRegForValue(II->getArgOperand(0));      if (!ResultReg) @@ -1272,6 +1453,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {    case Intrinsic::xray_customevent:      return selectXRayCustomEvent(II); +  case Intrinsic::xray_typedevent: +    return selectXRayTypedEvent(II);    }    return fastLowerIntrinsicCall(II); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 81347fa4bd46..42c7181dac41 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -23,6 +23,7 @@  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h"  #include "llvm/CodeGen/WinEHFuncInfo.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DerivedTypes.h" @@ -118,6 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,        }      }    } +  if (Personality == EHPersonality::Wasm_CXX) { +    WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); +    calculateWasmEHInfo(&fn, EHInfo); +  }    // Initialize the mapping of values to registers.  This is only set up for    // instruction values that are used outside of the block that defines @@ -226,9 +231,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,        const Instruction *PadInst = BB.getFirstNonPHI();        // If this is a non-landingpad EH pad, mark this function as using        // funclets. -      // FIXME: SEH catchpads do not create funclets, so we could avoid setting -      // this in such cases in order to improve frame layout. +      // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid +      // setting this in such cases in order to improve frame layout.        if (!isa<LandingPadInst>(PadInst)) { +        MF->setHasEHScopes(true);          MF->setHasEHFunclets(true);          MF->getFrameInfo().setHasOpaqueSPAdjustment(true);        } @@ -281,28 +287,46 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,      }    } -  if (!isFuncletEHPersonality(Personality)) -    return; - -  WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); +  if (isFuncletEHPersonality(Personality)) { +    WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); -  // Map all BB references in the WinEH data to MBBs. -  for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { -    for (WinEHHandlerType &H : TBME.HandlerArray) { -      if (H.Handler) -        H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; +    // Map all BB references in the WinEH data to MBBs. +    for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { +      for (WinEHHandlerType &H : TBME.HandlerArray) { +        if (H.Handler) +          H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; +      } +    } +    for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) +      if (UME.Cleanup) +        UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; +    for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { +      const auto *BB = UME.Handler.get<const BasicBlock *>(); +      UME.Handler = MBBMap[BB]; +    } +    for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { +      const auto *BB = CME.Handler.get<const BasicBlock *>(); +      CME.Handler = MBBMap[BB];      }    } -  for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) -    if (UME.Cleanup) -      UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; -  for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { -    const BasicBlock *BB = UME.Handler.get<const BasicBlock *>(); -    UME.Handler = MBBMap[BB]; -  } -  for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { -    const BasicBlock *BB = CME.Handler.get<const BasicBlock *>(); -    CME.Handler = MBBMap[BB]; + +  else if (Personality == EHPersonality::Wasm_CXX) { +    WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); +    // Map all BB references in the WinEH data to MBBs. +    DenseMap<BBOrMBB, BBOrMBB> NewMap; +    for (auto &KV : EHInfo.EHPadUnwindMap) { +      const auto *Src = KV.first.get<const BasicBlock *>(); +      const auto *Dst = KV.second.get<const BasicBlock *>(); +      NewMap[MBBMap[Src]] = MBBMap[Dst]; +    } +    EHInfo.EHPadUnwindMap = std::move(NewMap); +    NewMap.clear(); +    for (auto &KV : EHInfo.ThrowUnwindMap) { +      const auto *Src = KV.first.get<const BasicBlock *>(); +      const auto *Dst = KV.second.get<const BasicBlock *>(); +      NewMap[MBBMap[Src]] = MBBMap[Dst]; +    } +    EHInfo.ThrowUnwindMap = std::move(NewMap);    }  } @@ -312,12 +336,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,  void FunctionLoweringInfo::clear() {    MBBMap.clear();    ValueMap.clear(); +  VirtReg2Value.clear();    StaticAllocaMap.clear();    LiveOutRegInfo.clear();    VisitedBBs.clear();    ArgDbgValues.clear();    ByValArgFrameIndexMap.clear();    RegFixups.clear(); +  RegsWithFixups.clear();    StatepointStackSlots.clear();    StatepointSpillMaps.clear();    PreferredExtendType.clear(); @@ -483,7 +509,7 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {    auto I = ByValArgFrameIndexMap.find(A);    if (I != ByValArgFrameIndexMap.end())      return I->second; -  DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); +  LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n");    return INT_MAX;  } @@ -547,3 +573,13 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const    }    return std::make_pair(It->second, false);  } + +const Value * +FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { +  if (VirtReg2Value.empty()) { +    for (auto &P : ValueMap) { +      VirtReg2Value[P.second] = P.first; +    } +  } +  return VirtReg2Value[Vreg]; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index cc9b41b4b487..d6171f3177d7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -394,11 +394,26 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,    } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {      MIB.addFPImm(F->getConstantFPValue());    } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { +    unsigned VReg = R->getReg(); +    MVT OpVT = Op.getSimpleValueType(); +    const TargetRegisterClass *OpRC = +        TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; +    const TargetRegisterClass *IIRC = +        II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) +           : nullptr; + +    if (OpRC && IIRC && OpRC != IIRC && +        TargetRegisterInfo::isVirtualRegister(VReg)) { +      unsigned NewVReg = MRI->createVirtualRegister(IIRC); +      BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), +               TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); +      VReg = NewVReg; +    }      // Turn additional physreg operands into implicit uses on non-variadic      // instructions. This is used by call and return instructions passing      // arguments in registers.      bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); -    MIB.addReg(R->getReg(), getImplRegState(Imp)); +    MIB.addReg(VReg, getImplRegState(Imp));    } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {      MIB.addRegMask(RM->getRegMask());    } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { @@ -682,11 +697,15 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,    if (SD->getKind() == SDDbgValue::FRAMEIX) {      // Stack address; this needs to be lowered in target-dependent fashion.      // EmitTargetCodeForFrameDebugValue is responsible for allocation. -    return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) -        .addFrameIndex(SD->getFrameIx()) -        .addImm(0) -        .addMetadata(Var) -        .addMetadata(Expr); +    auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) +                       .addFrameIndex(SD->getFrameIx()); +    if (SD->isIndirect()) +      // Push [fi + 0] onto the DIExpression stack. +      FrameMI.addImm(0); +    else +      // Push fi onto the DIExpression stack. +      FrameMI.addReg(0); +    return FrameMI.addMetadata(Var).addMetadata(Expr);    }    // Otherwise, we're going to create an instruction here.    const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -705,6 +724,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,      else        AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,                   /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); +  } else if (SD->getKind() == SDDbgValue::VREG) { +    MIB.addReg(SD->getVReg(), RegState::Debug);    } else if (SD->getKind() == SDDbgValue::CONST) {      const Value *V = SD->getConst();      if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -736,6 +757,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,    return &*MIB;  } +MachineInstr * +InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { +  MDNode *Label = SD->getLabel(); +  DebugLoc DL = SD->getDebugLoc(); +  assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); + +  const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL); +  MachineInstrBuilder MIB = BuildMI(*MF, DL, II); +  MIB.addMetadata(Label); + +  return &*MIB; +} +  /// EmitMachineNode - Generate machine code for a target-specific node and  /// needed dependencies.  /// @@ -807,9 +842,34 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,    // Add result register values for things that are defined by this    // instruction. -  if (NumResults) +  if (NumResults) {      CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); +    // Transfer any IR flags from the SDNode to the MachineInstr +    MachineInstr *MI = MIB.getInstr(); +    const SDNodeFlags Flags = Node->getFlags(); +    if (Flags.hasNoSignedZeros()) +      MI->setFlag(MachineInstr::MIFlag::FmNsz); + +    if (Flags.hasAllowReciprocal()) +      MI->setFlag(MachineInstr::MIFlag::FmArcp); + +    if (Flags.hasNoNaNs()) +      MI->setFlag(MachineInstr::MIFlag::FmNoNans); + +    if (Flags.hasNoInfs()) +      MI->setFlag(MachineInstr::MIFlag::FmNoInfs); + +    if (Flags.hasAllowContract()) +      MI->setFlag(MachineInstr::MIFlag::FmContract); + +    if (Flags.hasApproximateFuncs()) +      MI->setFlag(MachineInstr::MIFlag::FmAfn); + +    if (Flags.hasAllowReassociation()) +      MI->setFlag(MachineInstr::MIFlag::FmReassoc); +  } +    // Emit all of the actual operands of this instruction, adding them to the    // instruction as appropriate.    bool HasOptPRefs = NumDefs > NumResults; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 8a8a1bbd18f7..701b6368690b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -113,6 +113,9 @@ public:    MachineInstr *EmitDbgValue(SDDbgValue *SD,                               DenseMap<SDValue, unsigned> &VRBaseMap); +  /// Generate machine instruction for a dbg_label node. +  MachineInstr *EmitDbgLabel(SDDbgLabel *SD); +    /// EmitNode - Generate machine code for a node and needed dependencies.    ///    void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3a2fb0c0a836..2b7ba1ffb309 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -22,7 +22,6 @@  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineJumpTableInfo.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -41,6 +40,7 @@  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetMachine.h" @@ -87,11 +87,11 @@ class SelectionDAGLegalize {    const TargetLowering &TLI;    SelectionDAG &DAG; -  /// \brief The set of nodes which have already been legalized. We hold a +  /// The set of nodes which have already been legalized. We hold a    /// reference to it in order to update as necessary on node deletion.    SmallPtrSetImpl<SDNode *> &LegalizedNodes; -  /// \brief A set of all the nodes updated during legalization. +  /// A set of all the nodes updated during legalization.    SmallSetVector<SDNode *, 16> *UpdatedNodes;    EVT getSetCCResultType(EVT VT) const { @@ -107,7 +107,7 @@ public:        : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),          LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} -  /// \brief Legalizes the given operation. +  /// Legalizes the given operation.    void LegalizeOp(SDNode *Node);  private: @@ -167,7 +167,7 @@ private:                            SDValue NewIntValue) const;    SDValue ExpandFCOPYSIGN(SDNode *Node) const;    SDValue ExpandFABS(SDNode *Node) const; -  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, +  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,                                 const SDLoc &dl);    SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,                                  const SDLoc &dl); @@ -200,8 +200,8 @@ public:    }    void ReplaceNode(SDNode *Old, SDNode *New) { -    DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); -          dbgs() << "     with:      "; New->dump(&DAG)); +    LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); +               dbgs() << "     with:      "; New->dump(&DAG));      assert(Old->getNumValues() == New->getNumValues() &&             "Replacing one node with another that produces a different number " @@ -213,8 +213,8 @@ public:    }    void ReplaceNode(SDValue Old, SDValue New) { -    DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); -          dbgs() << "     with:      "; New->dump(&DAG)); +    LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); +               dbgs() << "     with:      "; New->dump(&DAG));      DAG.ReplaceAllUsesWith(Old, New);      if (UpdatedNodes) @@ -223,13 +223,12 @@ public:    }    void ReplaceNode(SDNode *Old, const SDValue *New) { -    DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); +    LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));      DAG.ReplaceAllUsesWith(Old, New);      for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { -      DEBUG(dbgs() << (i == 0 ? "     with:      " -                              : "      and:      "); -            New[i]->dump(&DAG)); +      LLVM_DEBUG(dbgs() << (i == 0 ? "     with:      " : "      and:      "); +                 New[i]->dump(&DAG));        if (UpdatedNodes)          UpdatedNodes->insert(New[i].getNode());      } @@ -408,7 +407,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,  }  SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { -  DEBUG(dbgs() << "Optimizing float store operations\n"); +  LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");    // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'    // FIXME: We shouldn't do this for TargetConstantFP's.    // FIXME: move this to the DAG Combiner!  Note that we can't regress due @@ -477,7 +476,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {    AAMDNodes AAInfo = ST->getAAInfo();    if (!ST->isTruncatingStore()) { -    DEBUG(dbgs() << "Legalizing store operation\n"); +    LLVM_DEBUG(dbgs() << "Legalizing store operation\n");      if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {        ReplaceNode(ST, OptStore);        return; @@ -495,15 +494,15 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {        unsigned Align = ST->getAlignment();        const DataLayout &DL = DAG.getDataLayout();        if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { -        DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); +        LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");          SDValue Result = TLI.expandUnalignedStore(ST, DAG);          ReplaceNode(SDValue(ST, 0), Result);        } else -        DEBUG(dbgs() << "Legal store\n"); +        LLVM_DEBUG(dbgs() << "Legal store\n");        break;      }      case TargetLowering::Custom: { -      DEBUG(dbgs() << "Trying custom lowering\n"); +      LLVM_DEBUG(dbgs() << "Trying custom lowering\n");        SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);        if (Res && Res != SDValue(Node, 0))          ReplaceNode(SDValue(Node, 0), Res); @@ -524,7 +523,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {      return;    } -  DEBUG(dbgs() << "Legalizing truncating store operations\n"); +  LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n");    SDValue Value = ST->getValue();    EVT StVT = ST->getMemoryVT();    unsigned StWidth = StVT.getSizeInBits(); @@ -656,7 +655,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {    ISD::LoadExtType ExtType = LD->getExtensionType();    if (ExtType == ISD::NON_EXTLOAD) { -    DEBUG(dbgs() << "Legalizing non-extending load operation\n"); +    LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n");      MVT VT = Node->getSimpleValueType(0);      SDValue RVal = SDValue(Node, 0);      SDValue RChain = SDValue(Node, 1); @@ -706,7 +705,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {      return;    } -  DEBUG(dbgs() << "Legalizing extending load operation\n"); +  LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");    EVT SrcVT = LD->getMemoryVT();    unsigned SrcWidth = SrcVT.getSizeInBits();    unsigned Alignment = LD->getAlignment(); @@ -947,39 +946,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {    }  } -static TargetLowering::LegalizeAction -getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { -  unsigned EqOpc; -  switch (Opcode) { -    default: llvm_unreachable("Unexpected FP pseudo-opcode"); -    case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; -    case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; -    case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; -    case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; -    case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; -    case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; -    case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; -    case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; -    case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; -    case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; -    case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; -    case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; -    case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; -  } - -  auto Action = TLI.getOperationAction(EqOpc, VT); - -  // We don't currently handle Custom or Promote for strict FP pseudo-ops. -  // For now, we just expand for those cases. -  if (Action != TargetLowering::Legal) -    Action = TargetLowering::Expand; - -  return Action; -} -  /// Return a legal replacement for the given operation, with all legal operands.  void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { -  DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); +  LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));    // Allow illegal target nodes and illegal registers.    if (Node->getOpcode() == ISD::TargetConstant || @@ -1043,8 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {    case ISD::SETCC:    case ISD::BR_CC: {      unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : -                         Node->getOpcode() == ISD::SETCC ? 2 : -                         Node->getOpcode() == ISD::SETCCE ? 3 : 1; +                         Node->getOpcode() == ISD::SETCC ? 2 : 1;      unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;      MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();      ISD::CondCode CCCode = @@ -1122,6 +1090,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {        return;      }      break; +  case ISD::STRICT_FADD: +  case ISD::STRICT_FSUB: +  case ISD::STRICT_FMUL: +  case ISD::STRICT_FDIV:    case ISD::STRICT_FSQRT:    case ISD::STRICT_FMA:    case ISD::STRICT_FPOW: @@ -1139,8 +1111,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {      // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT      // is also legal, but if ISD::FSQRT requires expansion then so does      // ISD::STRICT_FSQRT. -    Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), -                                     Node->getValueType(0)); +    Action = TLI.getStrictFPOperationAction(Node->getOpcode(), +                                            Node->getValueType(0));      break;    default:      if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { @@ -1202,10 +1174,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {      }      switch (Action) {      case TargetLowering::Legal: -      DEBUG(dbgs() << "Legal node: nothing to do\n"); +      LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");        return;      case TargetLowering::Custom: -      DEBUG(dbgs() << "Trying custom legalization\n"); +      LLVM_DEBUG(dbgs() << "Trying custom legalization\n");        // FIXME: The handling for custom lowering with multiple results is        // a complete mess.        if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { @@ -1213,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {            return;          if (Node->getNumValues() == 1) { -          DEBUG(dbgs() << "Successfully custom legalized node\n"); +          LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");            // We can just directly replace this node with the lowered value.            ReplaceNode(SDValue(Node, 0), Res);            return; @@ -1222,11 +1194,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {          SmallVector<SDValue, 8> ResultVals;          for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)            ResultVals.push_back(Res.getValue(i)); -        DEBUG(dbgs() << "Successfully custom legalized node\n"); +        LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");          ReplaceNode(Node, ResultVals.data());          return;        } -      DEBUG(dbgs() << "Could not custom legalize node\n"); +      LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");        LLVM_FALLTHROUGH;      case TargetLowering::Expand:        if (ExpandNode(Node)) @@ -1623,6 +1595,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,    MVT OpVT = LHS.getSimpleValueType();    ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();    NeedInvert = false; +  bool NeedSwap = false;    switch (TLI.getCondCodeAction(CCCode, OpVT)) {    default: llvm_unreachable("Unknown condition code action!");    case TargetLowering::Legal: @@ -1630,23 +1603,37 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,      break;    case TargetLowering::Expand: {      ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); -    if (TLI.isCondCodeLegal(InvCC, OpVT)) { +    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {        std::swap(LHS, RHS);        CC = DAG.getCondCode(InvCC);        return true;      } +    // Swapping operands didn't work. Try inverting the condition. +    InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); +    if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { +      // If inverting the condition is not enough, try swapping operands +      // on top of it. +      InvCC = ISD::getSetCCSwappedOperands(InvCC); +      NeedSwap = true; +    } +    if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { +      CC = DAG.getCondCode(InvCC); +      NeedInvert = true; +      if (NeedSwap) +        std::swap(LHS, RHS); +      return true; +    } +      ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;      unsigned Opc = 0;      switch (CCCode) {      default: llvm_unreachable("Don't know how to expand this condition!");      case ISD::SETO: -        assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) -            == TargetLowering::Legal +        assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)              && "If SETO is expanded, SETOEQ must be legal!");          CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;      case ISD::SETUO: -        assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) -            == TargetLowering::Legal +        assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT)              && "If SETUO is expanded, SETUNE must be legal!");          CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;  break;      case ISD::SETOEQ: @@ -1676,20 +1663,10 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,      case ISD::SETGT:      case ISD::SETGE:      case ISD::SETLT: -      // We only support using the inverted operation, which is computed above -      // and not a different manner of supporting expanding these cases. -      llvm_unreachable("Don't know how to expand this condition!");      case ISD::SETNE:      case ISD::SETEQ: -      // Try inverting the result of the inverse condition. -      InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; -      if (TLI.isCondCodeLegal(InvCC, OpVT)) { -        CC = DAG.getCondCode(InvCC); -        NeedInvert = true; -        return true; -      } -      // If inverting the condition didn't work then we have no means to expand -      // the condition. +      // If all combinations of inverting the condition and swapping operands +      // didn't work then we have no means to expand the condition.        llvm_unreachable("Don't know how to expand this condition!");      } @@ -2036,12 +2013,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,    std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);    if (!CallInfo.second.getNode()) { -    DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); +    LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());      // It's a tailcall, return the chain (which is the DAG root).      return DAG.getRoot();    } -  DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); +  LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());    return CallInfo.first;  } @@ -2327,10 +2304,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,                                                     EVT DestVT,                                                     const SDLoc &dl) {    // TODO: Should any fast-math-flags be set for the created nodes? -  DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); +  LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");    if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { -    DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " -                    "expansion\n"); +    LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " +                         "expansion\n");      // Get the stack frame index of a 8 byte buffer.      SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); @@ -2395,7 +2372,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,    // and in all alternate rounding modes.    // TODO: Generalize this for use with other types.    if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { -    DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); +    LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n");      SDValue TwoP52 =        DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);      SDValue TwoP84PlusTwoP52 = @@ -2418,7 +2395,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,    // TODO: Generalize this for use with other types.    if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { -    DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); +    LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n");      // For unsigned conversions, convert them to signed conversions using the      // algorithm from the x86_64 __floatundidf in compiler_rt.      if (!isSigned) { @@ -2853,7 +2830,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,  }  bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { -  DEBUG(dbgs() << "Trying to expand node\n"); +  LLVM_DEBUG(dbgs() << "Trying to expand node\n");    SmallVector<SDValue, 8> Results;    SDLoc dl(Node);    SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -3311,7 +3288,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {      }      break;    case ISD::FP_TO_FP16: -    DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); +    LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");      if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {        SDValue Op = Node->getOperand(0);        MVT SVT = Op.getSimpleValueType(); @@ -3525,15 +3502,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {    case ISD::USUBO: {      SDValue LHS = Node->getOperand(0);      SDValue RHS = Node->getOperand(1); -    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? -                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(), -                              LHS, RHS); +    bool IsAdd = Node->getOpcode() == ISD::UADDO; +    // If ADD/SUBCARRY is legal, use that instead. +    unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY; +    if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) { +      SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1)); +      SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(), +                                      { LHS, RHS, CarryIn }); +      Results.push_back(SDValue(NodeCarry.getNode(), 0)); +      Results.push_back(SDValue(NodeCarry.getNode(), 1)); +      break; +    } + +    SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, +                              LHS.getValueType(), LHS, RHS);      Results.push_back(Sum);      EVT ResultType = Node->getValueType(1);      EVT SetCCType = getSetCCResultType(Node->getValueType(0)); -    ISD::CondCode CC -      = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; +    ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;      SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);      Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); @@ -3684,8 +3671,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {      unsigned EntrySize =        DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); -    Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, -                        DAG.getConstant(EntrySize, dl, Index.getValueType())); +    // For power-of-two jumptable entry sizes convert multiplication to a shift. +    // This transformation needs to be done here since otherwise the MIPS +    // backend will end up emitting a three instruction multiply sequence +    // instead of a single shift and MSP430 will call a runtime function. +    if (llvm::isPowerOf2_32(EntrySize)) +      Index = DAG.getNode( +          ISD::SHL, dl, Index.getValueType(), Index, +          DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType())); +    else +      Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, +                          DAG.getConstant(EntrySize, dl, Index.getValueType()));      SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),                                 Index, Table); @@ -3701,7 +3697,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {        Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,                            TLI.getPICJumpTableRelocBase(Table, DAG));      } -    Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + +    Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG);      Results.push_back(Tmp1);      break;    } @@ -3720,7 +3717,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {        if (Tmp2.isUndef() ||            (Tmp2.getOpcode() == ISD::AND &&             isa<ConstantSDNode>(Tmp2.getOperand(1)) && -           dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) +           cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))          Tmp3 = Tmp2;        else          Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, @@ -3759,7 +3756,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {      // illegal; expand it into a SELECT_CC.      EVT VT = Node->getValueType(0);      int TrueValue; -    switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { +    switch (TLI.getBooleanContents(Tmp1.getValueType())) {      case TargetLowering::ZeroOrOneBooleanContent:      case TargetLowering::UndefinedBooleanContent:        TrueValue = 1; @@ -3784,7 +3781,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {      SDValue CC = Node->getOperand(4);      ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); -    if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { +    if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) {        // If the condition code is legal, then we need to expand this        // node using SETCC and SELECT.        EVT CmpVT = Tmp1.getValueType(); @@ -3805,7 +3802,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {      // version (or vice versa).      ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,                                                 Tmp1.getValueType().isInteger()); -    if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { +    if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {        // Use the new condition code and swap true and false        Legalized = true;        Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); @@ -3813,7 +3810,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {        // If The inverse is not legal, then try to swap the arguments using        // the inverse condition code.        ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); -      if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { +      if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) {          // The swapped inverse condition is legal, so swap true and false,          // lhs and rhs.          Legalized = true; @@ -3906,6 +3903,46 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {      ReplaceNode(SDValue(Node, 0), Result);      break;    } +  case ISD::ROTL: +  case ISD::ROTR: { +    bool IsLeft = Node->getOpcode() == ISD::ROTL; +    SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); +    EVT ResVT = Node->getValueType(0); +    EVT OpVT = Op0.getValueType(); +    assert(OpVT == ResVT && +           "The result and the operand types of rotate should match"); +    EVT ShVT = Op1.getValueType(); +    SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT); + +    // If a rotate in the other direction is legal, use it. +    unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; +    if (TLI.isOperationLegal(RevRot, ResVT)) { +      SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); +      Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); +      break; +    } + +    // Otherwise, +    //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) +    //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) +    // +    assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) && +           "Expecting the type bitwidth to be a power of 2"); +    unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; +    unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; +    SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, +                                 Width, DAG.getConstant(1, dl, ShVT)); +    SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); +    SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); +    SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); + +    SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, +                             DAG.getNode(ShOpc, dl, ResVT, Op0, And0), +                             DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); +    Results.push_back(Or); +    break; +  } +    case ISD::GLOBAL_OFFSET_TABLE:    case ISD::GlobalAddress:    case ISD::GlobalTLSAddress: @@ -3921,19 +3958,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {    // Replace the original node with the legalized result.    if (Results.empty()) { -    DEBUG(dbgs() << "Cannot expand node\n"); +    LLVM_DEBUG(dbgs() << "Cannot expand node\n");      return false;    } -  DEBUG(dbgs() << "Succesfully expanded node\n"); +  LLVM_DEBUG(dbgs() << "Succesfully expanded node\n");    ReplaceNode(Node, Results.data());    return true;  }  void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { -  DEBUG(dbgs() << "Trying to convert node to libcall\n"); +  LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");    SmallVector<SDValue, 8> Results;    SDLoc dl(Node); +  // FIXME: Check flags on the node to see if we can use a finite call. +  bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath;    unsigned Opc = Node->getOpcode();    switch (Opc) {    case ISD::ATOMIC_FENCE: { @@ -3962,6 +4001,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {    case ISD::ATOMIC_LOAD_ADD:    case ISD::ATOMIC_LOAD_SUB:    case ISD::ATOMIC_LOAD_AND: +  case ISD::ATOMIC_LOAD_CLR:    case ISD::ATOMIC_LOAD_OR:    case ISD::ATOMIC_LOAD_XOR:    case ISD::ATOMIC_LOAD_NAND: @@ -4028,33 +4068,68 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {      break;    case ISD::FLOG:    case ISD::STRICT_FLOG: -    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, -                                      RTLIB::LOG_F80, RTLIB::LOG_F128, -                                      RTLIB::LOG_PPCF128)); +    if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) +      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, +                                        RTLIB::LOG_FINITE_F64, +                                        RTLIB::LOG_FINITE_F80, +                                        RTLIB::LOG_FINITE_F128, +                                        RTLIB::LOG_FINITE_PPCF128)); +    else +      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, +                                        RTLIB::LOG_F80, RTLIB::LOG_F128, +                                        RTLIB::LOG_PPCF128));      break;    case ISD::FLOG2:    case ISD::STRICT_FLOG2: -    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, -                                      RTLIB::LOG2_F80, RTLIB::LOG2_F128, -                                      RTLIB::LOG2_PPCF128)); +    if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) +      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, +                                        RTLIB::LOG2_FINITE_F64, +                                        RTLIB::LOG2_FINITE_F80, +                                        RTLIB::LOG2_FINITE_F128, +                                        RTLIB::LOG2_FINITE_PPCF128)); +    else +      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, +                                        RTLIB::LOG2_F80, RTLIB::LOG2_F128, +                                        RTLIB::LOG2_PPCF128));      break;    case ISD::FLOG10:    case ISD::STRICT_FLOG10: -    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, -                                      RTLIB::LOG10_F80, RTLIB::LOG10_F128, -                                      RTLIB::LOG10_PPCF128)); +    if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) +      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, +                                        RTLIB::LOG10_FINITE_F64, +                                        RTLIB::LOG10_FINITE_F80, +                                        RTLIB::LOG10_FINITE_F128, +                                        RTLIB::LOG10_FINITE_PPCF128)); +    else +      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, +                                        RTLIB::LOG10_F80, RTLIB::LOG10_F128, +                                        RTLIB::LOG10_PPCF128));      break;    case ISD::FEXP:    case ISD::STRICT_FEXP: -    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, -                                      RTLIB::EXP_F80, RTLIB::EXP_F128, -                                      RTLIB::EXP_PPCF128)); +    if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) +      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, +                                        RTLIB::EXP_FINITE_F64, +                                        RTLIB::EXP_FINITE_F80, +                                        RTLIB::EXP_FINITE_F128, +                                        RTLIB::EXP_FINITE_PPCF128)); +    else +      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, +                                        RTLIB::EXP_F80, RTLIB::EXP_F128, +                                        RTLIB::EXP_PPCF128));      break;    case ISD::FEXP2:    case ISD::STRICT_FEXP2: -    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, -                                      RTLIB::EXP2_F80, RTLIB::EXP2_F128, -                                      RTLIB::EXP2_PPCF128)); +    if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) +      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, +                                        RTLIB::EXP2_FINITE_F64, +                                        RTLIB::EXP2_FINITE_F80, +                                        RTLIB::EXP2_FINITE_F128, +                                        RTLIB::EXP2_FINITE_PPCF128)); +    else +      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, +                                        RTLIB::EXP2_F80, RTLIB::EXP2_F128, +                                        RTLIB::EXP2_PPCF128));      break;    case ISD::FTRUNC:      Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, @@ -4100,9 +4175,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {      break;    case ISD::FPOW:    case ISD::STRICT_FPOW: -    Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, -                                      RTLIB::POW_F80, RTLIB::POW_F128, -                                      RTLIB::POW_PPCF128)); +    if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) +      Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, +                                        RTLIB::POW_FINITE_F64, +                                        RTLIB::POW_FINITE_F80, +                                        RTLIB::POW_FINITE_F128, +                                        RTLIB::POW_FINITE_PPCF128)); +    else +      Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, +                                        RTLIB::POW_F80, RTLIB::POW_F128, +                                        RTLIB::POW_PPCF128));      break;    case ISD::FDIV:      Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, @@ -4186,10 +4268,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {    // Replace the original node with the legalized result.    if (!Results.empty()) { -    DEBUG(dbgs() << "Successfully converted node to libcall\n"); +    LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n");      ReplaceNode(Node, Results.data());    } else -    DEBUG(dbgs() << "Could not convert node to libcall\n"); +    LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n");  }  // Determine the vector type to use in place of an original scalar element when @@ -4203,7 +4285,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI,  }  void SelectionDAGLegalize::PromoteNode(SDNode *Node) { -  DEBUG(dbgs() << "Trying to promote node\n"); +  LLVM_DEBUG(dbgs() << "Trying to promote node\n");    SmallVector<SDValue, 8> Results;    MVT OVT = Node->getSimpleValueType(0);    if (Node->getOpcode() == ISD::UINT_TO_FP || @@ -4256,7 +4338,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {          ISD::SRL, dl, NVT, Tmp1,          DAG.getConstant(DiffBits, dl,                          TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); -    Results.push_back(Tmp1); + +    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));      break;    }    case ISD::FP_TO_UINT: @@ -4640,10 +4723,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {    // Replace the original node with the legalized result.    if (!Results.empty()) { -    DEBUG(dbgs() << "Successfully promoted node\n"); +    LLVM_DEBUG(dbgs() << "Successfully promoted node\n");      ReplaceNode(Node, Results.data());    } else -    DEBUG(dbgs() << "Could not promote node\n"); +    LLVM_DEBUG(dbgs() << "Could not promote node\n");  }  /// This is the entry point for the file. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e28a3aa47ca3..b0ae1e0399fb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -47,8 +47,8 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,  //===----------------------------------------------------------------------===//  bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { -  DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); +             dbgs() << "\n");    SDValue R = SDValue();    switch (N->getOpcode()) { @@ -738,8 +738,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {  //===----------------------------------------------------------------------===//  bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { -  DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); +             dbgs() << "\n");    SDValue Res = SDValue();    switch (N->getOpcode()) { @@ -1039,7 +1039,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {  /// have invalid operands or may have other results that need promotion, we just  /// know that (at least) one result needs expansion.  void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { -  DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");    SDValue Lo, Hi;    Lo = Hi = SDValue(); @@ -1538,7 +1538,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,  /// types of the node are known to be legal, but other operands of the node may  /// need promotion or expansion as well as the specified one.  bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { -  DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");    SDValue Res = SDValue();    // See if the target wants to custom expand this node. @@ -1658,18 +1658,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {    EVT RVT = N->getValueType(0);    SDLoc dl(N); -  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on -  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way. -  if (RVT == MVT::i32) { -    assert(N->getOperand(0).getValueType() == MVT::ppcf128 && -           "Logic only correct for ppcf128!"); -    SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, -                              N->getOperand(0), DAG.getValueType(MVT::f64)); -    Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, -                      DAG.getIntPtrConstant(1, dl)); -    return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); -  } -    RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");    return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; @@ -1679,31 +1667,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {    EVT RVT = N->getValueType(0);    SDLoc dl(N); -  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on -  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way. -  if (RVT == MVT::i32) { -    assert(N->getOperand(0).getValueType() == MVT::ppcf128 && -           "Logic only correct for ppcf128!"); -    const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; -    APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); -    SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); -    //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X -    // FIXME: generated code sucks. -    // TODO: Are there fast-math-flags to propagate to this FSUB? -    return DAG.getSelectCC(dl, N->getOperand(0), Tmp, -                           DAG.getNode(ISD::ADD, dl, MVT::i32, -                                       DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, -                                                   DAG.getNode(ISD::FSUB, dl, -                                                               MVT::ppcf128, -                                                               N->getOperand(0), -                                                               Tmp)), -                                       DAG.getConstant(0x80000000, dl, -                                                       MVT::i32)), -                           DAG.getNode(ISD::FP_TO_SINT, dl, -                                       MVT::i32, N->getOperand(0)), -                           ISD::SETGE); -  } -    RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");    return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), @@ -2139,13 +2102,12 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {    // Load the value as an integer value with the same number of bits.    EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); -  auto MMOFlags = -      L->getMemOperand()->getFlags() & -      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);    SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,                               SDLoc(N), L->getChain(), L->getBasePtr(),                               L->getOffset(), L->getPointerInfo(), IVT, -                             L->getAlignment(), MMOFlags, L->getAAInfo()); +                             L->getAlignment(), +                             L->getMemOperand()->getFlags(), +                             L->getAAInfo());    // Legalize the chain result by replacing uses of the old value chain with the    // new one    ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 29f0bb475b08..63a1ea13a5f5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -36,12 +36,13 @@ using namespace llvm;  /// may also have invalid operands or may have other results that need  /// expansion, we just know that (at least) one result needs promotion.  void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { -  DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); +             dbgs() << "\n");    SDValue Res = SDValue();    // See if the target wants to custom expand this node.    if (CustomLowerNode(N, N->getValueType(ResNo), true)) { -    DEBUG(dbgs() << "Node has been custom expanded, done\n"); +    LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");      return;    } @@ -146,6 +147,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::ATOMIC_LOAD_ADD:    case ISD::ATOMIC_LOAD_SUB:    case ISD::ATOMIC_LOAD_AND: +  case ISD::ATOMIC_LOAD_CLR:    case ISD::ATOMIC_LOAD_OR:    case ISD::ATOMIC_LOAD_XOR:    case ISD::ATOMIC_LOAD_NAND: @@ -501,7 +503,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {    SDLoc dl(N);    SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), -                   N->getIndex()}; +                   N->getIndex(), N->getScale() };    SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),                                      N->getMemoryVT(), dl, Ops,                                      N->getMemOperand()); @@ -586,43 +588,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {  }  SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { -  EVT SVT = getSetCCResultType(N->getOperand(0).getValueType()); - +  EVT InVT = N->getOperand(0).getValueType();    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); -  // Only use the result of getSetCCResultType if it is legal, -  // otherwise just use the promoted result type (NVT). -  if (!TLI.isTypeLegal(SVT)) -    SVT = NVT; +  EVT SVT = getSetCCResultType(InVT); + +  // If we got back a type that needs to be promoted, this likely means the +  // the input type also needs to be promoted. So get the promoted type for +  // the input and try the query again. +  if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) { +    if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { +      InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); +      SVT = getSetCCResultType(InVT); +    } else { +      // Input type isn't promoted, just use the default promoted type. +      SVT = NVT; +    } +  }    SDLoc dl(N);    assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&           "Vector compare must return a vector result!"); -  SDValue LHS = N->getOperand(0); -  SDValue RHS = N->getOperand(1); -  if (LHS.getValueType() != RHS.getValueType()) { -    if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && -        !LHS.getValueType().isVector()) -      LHS = GetPromotedInteger(LHS); -    if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && -        !RHS.getValueType().isVector()) -      RHS = GetPromotedInteger(RHS); -  } -    // Get the SETCC result using the canonical SETCC type. -  SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, -                              N->getOperand(2)); +  SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), +                              N->getOperand(1), N->getOperand(2));    // Convert to the expected type.    return DAG.getSExtOrTrunc(SetCC, dl, NVT);  }  SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { -  SDValue LHS = N->getOperand(0); +  SDValue LHS = GetPromotedInteger(N->getOperand(0));    SDValue RHS = N->getOperand(1); -  if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) -    LHS = GetPromotedInteger(LHS);    if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)      RHS = ZExtPromotedInteger(RHS);    return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); @@ -661,22 +659,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {  }  SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { -  SDValue LHS = N->getOperand(0); -  SDValue RHS = N->getOperand(1);    // The input value must be properly sign extended. -  if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) -    LHS = SExtPromotedInteger(LHS); +  SDValue LHS = SExtPromotedInteger(N->getOperand(0)); +  SDValue RHS = N->getOperand(1);    if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)      RHS = ZExtPromotedInteger(RHS);    return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);  }  SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { -  SDValue LHS = N->getOperand(0); -  SDValue RHS = N->getOperand(1);    // The input value must be properly zero extended. -  if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) -    LHS = ZExtPromotedInteger(LHS); +  SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); +  SDValue RHS = N->getOperand(1);    if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)      RHS = ZExtPromotedInteger(RHS);    return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); @@ -904,11 +898,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {  /// result types of the node are known to be legal, but other operands of the  /// node may need promotion or expansion as well as the specified one.  bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { -  DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); +             dbgs() << "\n");    SDValue Res = SDValue();    if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { -    DEBUG(dbgs() << "Node has been custom lowered, done\n"); +    LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n");      return false;    } @@ -1001,11 +996,11 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,      // than the width of NewLHS/NewRH, we can avoid inserting real truncate      // instruction, which is redudant eventually.      unsigned OpLEffectiveBits = -        OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; +        OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;      unsigned OpREffectiveBits = -        OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; -    if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() && -        OpREffectiveBits <= NewRHS.getValueSizeInBits()) { +        OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; +    if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && +        OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {        NewLHS = OpL;        NewRHS = OpR;      } else { @@ -1356,7 +1351,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {  /// have invalid operands or may have other results that need promotion, we just  /// know that (at least) one result needs expansion.  void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { -  DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); +             dbgs() << "\n");    SDValue Lo, Hi;    Lo = Hi = SDValue(); @@ -1413,6 +1409,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {    case ISD::ATOMIC_LOAD_ADD:    case ISD::ATOMIC_LOAD_SUB:    case ISD::ATOMIC_LOAD_AND: +  case ISD::ATOMIC_LOAD_CLR:    case ISD::ATOMIC_LOAD_OR:    case ISD::ATOMIC_LOAD_XOR:    case ISD::ATOMIC_LOAD_NAND: @@ -2893,7 +2890,8 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,  /// result types of the node are known to be legal, but other operands of the  /// node may need promotion or expansion as well as the specified one.  bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { -  DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); +             dbgs() << "\n");    SDValue Res = SDValue();    if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -2915,7 +2913,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {    case ISD::SCALAR_TO_VECTOR:  Res = ExpandOp_SCALAR_TO_VECTOR(N); break;    case ISD::SELECT_CC:         Res = ExpandIntOp_SELECT_CC(N); break;    case ISD::SETCC:             Res = ExpandIntOp_SETCC(N); break; -  case ISD::SETCCE:            Res = ExpandIntOp_SETCCE(N); break;    case ISD::SETCCCARRY:        Res = ExpandIntOp_SETCCCARRY(N); break;    case ISD::SINT_TO_FP:        Res = ExpandIntOp_SINT_TO_FP(N); break;    case ISD::STORE:   Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -3051,15 +3048,14 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,      return;    } -  // Lower with SETCCE or SETCCCARRY if the target supports it. +  // Lower with SETCCCARRY if the target supports it.    EVT HiVT = LHSHi.getValueType();    EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT);    bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT);    // FIXME: Make all targets support this, then remove the other lowering. -  if (HasSETCCCARRY || -      TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) { -    // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip +  if (HasSETCCCARRY) { +    // SETCCCARRY can detect < and >= directly. For > and <=, flip      // operands and condition code.      bool FlipOperands = false;      switch (CCCode) { @@ -3074,17 +3070,15 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,        std::swap(LHSHi, RHSHi);      }      // Perform a wide subtraction, feeding the carry from the low part into -    // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially -    // looking at the high part of the result of LHS - RHS. It is negative -    // iff LHS < RHS. It is zero or positive iff LHS >= RHS. +    // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high +    // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is +    // zero or positive iff LHS >= RHS.      EVT LoVT = LHSLo.getValueType(); -    SDVTList VTList = DAG.getVTList( -        LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue); -    SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl, -                                 VTList, LHSLo, RHSLo); -    SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl, -                              getSetCCResultType(HiVT), LHSHi, RHSHi, -                              LowCmp.getValue(1), DAG.getCondCode(CCCode)); +    SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT)); +    SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo); +    SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT), +                              LHSHi, RHSHi, LowCmp.getValue(1), +                              DAG.getCondCode(CCCode));      NewLHS = Res;      NewRHS = SDValue();      return; @@ -3152,24 +3146,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {        DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0);  } -SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { -  SDValue LHS = N->getOperand(0); -  SDValue RHS = N->getOperand(1); -  SDValue Carry = N->getOperand(2); -  SDValue Cond = N->getOperand(3); -  SDLoc dl = SDLoc(N); - -  SDValue LHSLo, LHSHi, RHSLo, RHSHi; -  GetExpandedInteger(LHS, LHSLo, LHSHi); -  GetExpandedInteger(RHS, RHSLo, RHSHi); - -  // Expand to a SUBE for the low part and a smaller SETCCE for the high. -  SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); -  SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); -  return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, -                     LowCmp.getValue(1), Cond); -} -  SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {    SDValue LHS = N->getOperand(0);    SDValue RHS = N->getOperand(1); @@ -3497,21 +3473,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {    assert(NumElem * NumOperands == NumOutElem &&           "Unexpected number of elements"); -  // If the input type is legal and we can promote it to a legal type with the -  // same element size, go ahead do that to create a new concat. -  if (getTypeAction(N->getOperand(0).getValueType()) == -      TargetLowering::TypeLegal) { -    EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem); -    if (TLI.isTypeLegal(InPromotedTy)) { -      SmallVector<SDValue, 8> Ops(NumOperands); -      for (unsigned i = 0; i < NumOperands; ++i) { -        Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy, -                             N->getOperand(i)); -      } -      return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops); -    } -  } -    // Take the elements from the first vector.    SmallVector<SDValue, 8> Ops(NumOutElem);    for (unsigned i = 0; i < NumOperands; ++i) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 4438ee7878b8..a9f144c06e9a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -84,9 +84,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {        SDValue Res(&Node, i);        EVT VT = Res.getValueType();        bool Failed = false; +      // Don't create a value in map. +      auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;        unsigned Mapped = 0; -      if (ReplacedValues.find(Res) != ReplacedValues.end()) { +      if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {          Mapped |= 1;          // Check that remapped values are only used by nodes marked NewNode.          for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); @@ -97,30 +99,32 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {          // Check that the final result of applying ReplacedValues is not          // marked NewNode. -        SDValue NewVal = ReplacedValues[Res]; -        DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); +        auto NewValId = ReplacedValues[ResId]; +        auto I = ReplacedValues.find(NewValId);          while (I != ReplacedValues.end()) { -          NewVal = I->second; -          I = ReplacedValues.find(NewVal); +          NewValId = I->second; +          I = ReplacedValues.find(NewValId);          } +        SDValue NewVal = getSDValue(NewValId); +        (void)NewVal;          assert(NewVal.getNode()->getNodeId() != NewNode &&                 "ReplacedValues maps to a new node!");        } -      if (PromotedIntegers.find(Res) != PromotedIntegers.end()) +      if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end())          Mapped |= 2; -      if (SoftenedFloats.find(Res) != SoftenedFloats.end()) +      if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end())          Mapped |= 4; -      if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) +      if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end())          Mapped |= 8; -      if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) +      if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end())          Mapped |= 16; -      if (ExpandedFloats.find(Res) != ExpandedFloats.end()) +      if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end())          Mapped |= 32; -      if (SplitVectors.find(Res) != SplitVectors.end()) +      if (ResId && SplitVectors.find(ResId) != SplitVectors.end())          Mapped |= 64; -      if (WidenedVectors.find(Res) != WidenedVectors.end()) +      if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end())          Mapped |= 128; -      if (PromotedFloats.find(Res) != PromotedFloats.end()) +      if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())          Mapped |= 256;        if (Node.getNodeId() != Processed) { @@ -224,9 +228,9 @@ bool DAGTypeLegalizer::run() {      assert(N->getNodeId() == ReadyToProcess &&             "Node should be ready if on worklist!"); -    DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); +    LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG));      if (IgnoreNodeResults(N)) { -      DEBUG(dbgs() << "Ignoring node results\n"); +      LLVM_DEBUG(dbgs() << "Ignoring node results\n");        goto ScanOperands;      } @@ -234,11 +238,11 @@ bool DAGTypeLegalizer::run() {      // types are illegal.      for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {        EVT ResultVT = N->getValueType(i); -      DEBUG(dbgs() << "Analyzing result type: " << -                      ResultVT.getEVTString() << "\n"); +      LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString() +                        << "\n");        switch (getTypeAction(ResultVT)) {        case TargetLowering::TypeLegal: -        DEBUG(dbgs() << "Legal result type\n"); +        LLVM_DEBUG(dbgs() << "Legal result type\n");          break;        // The following calls must take care of *all* of the node's results,        // not just the illegal result they were passed (this includes results @@ -296,11 +300,11 @@ ScanOperands:          continue;        const auto Op = N->getOperand(i); -      DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); +      LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG));        EVT OpVT = Op.getValueType();        switch (getTypeAction(OpVT)) {        case TargetLowering::TypeLegal: -        DEBUG(dbgs() << "Legal operand\n"); +        LLVM_DEBUG(dbgs() << "Legal operand\n");          continue;        // The following calls must either replace all of the node's results        // using ReplaceValueWith, and return "false"; or update the node's @@ -370,7 +374,8 @@ ScanOperands:      }      if (i == NumOperands) { -      DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); +      LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); +                 dbgs() << "\n");      }      }  NodeDone: @@ -490,9 +495,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {    if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)      return N; -  // Remove any stale map entries. -  ExpungeNode(N); -    // Okay, we know that this node is new.  Recursively walk all of its operands    // to see if they are new also.  The depth of this walk is bounded by the size    // of the new tree that was constructed (usually 2-3 nodes), so we don't worry @@ -543,7 +545,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {        // to remap the operands, since they are the same as the operands we        // remapped above.        N = M; -      ExpungeNode(N);      }    } @@ -564,100 +565,25 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {      RemapValue(Val);  } -/// If N has a bogus mapping in ReplacedValues, eliminate it. -/// This can occur when a node is deleted then reallocated as a new node - -/// the mapping in ReplacedValues applies to the deleted node, not the new -/// one. -/// The only map that can have a deleted node as a source is ReplacedValues. -/// Other maps can have deleted nodes as targets, but since their looked-up -/// values are always immediately remapped using RemapValue, resulting in a -/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue -/// always performs correct mappings.  In order to keep the mapping correct, -/// ExpungeNode should be called on any new nodes *before* adding them as -/// either source or target to ReplacedValues (which typically means calling -/// Expunge when a new node is first seen, since it may no longer be marked -/// NewNode by the time it is added to ReplacedValues). -void DAGTypeLegalizer::ExpungeNode(SDNode *N) { -  if (N->getNodeId() != NewNode) -    return; - -  // If N is not remapped by ReplacedValues then there is nothing to do. -  unsigned i, e; -  for (i = 0, e = N->getNumValues(); i != e; ++i) -    if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) -      break; - -  if (i == e) -    return; - -  // Remove N from all maps - this is expensive but rare. - -  for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), -       E = PromotedIntegers.end(); I != E; ++I) { -    assert(I->first.getNode() != N); -    RemapValue(I->second); -  } - -  for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), -       E = SoftenedFloats.end(); I != E; ++I) { -    assert(I->first.getNode() != N); -    RemapValue(I->second); -  } - -  for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), -       E = ScalarizedVectors.end(); I != E; ++I) { -    assert(I->first.getNode() != N); -    RemapValue(I->second); -  } - -  for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), -       E = WidenedVectors.end(); I != E; ++I) { -    assert(I->first.getNode() != N); -    RemapValue(I->second); -  } - -  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator -       I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ -    assert(I->first.getNode() != N); -    RemapValue(I->second.first); -    RemapValue(I->second.second); -  } - -  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator -       I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { -    assert(I->first.getNode() != N); -    RemapValue(I->second.first); -    RemapValue(I->second.second); -  } - -  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator -       I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { -    assert(I->first.getNode() != N); -    RemapValue(I->second.first); -    RemapValue(I->second.second); -  } - -  for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), -       E = ReplacedValues.end(); I != E; ++I) -    RemapValue(I->second); - -  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) -    ReplacedValues.erase(SDValue(N, i)); -} -  /// If the specified value was already legalized to another value,  /// replace it by that value. -void DAGTypeLegalizer::RemapValue(SDValue &N) { -  DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); +void DAGTypeLegalizer::RemapValue(SDValue &V) { +  auto Id = getTableId(V); +  V = getSDValue(Id); +} + +void DAGTypeLegalizer::RemapId(TableId &Id) { +  auto I = ReplacedValues.find(Id);    if (I != ReplacedValues.end()) { +    assert(Id != I->second && "Id is mapped to itself.");      // Use path compression to speed up future lookups if values get multiply      // replaced with other values. -    RemapValue(I->second); -    N = I->second; +    RemapId(I->second); +    Id = I->second; -    // Note that it is possible to have N.getNode()->getNodeId() == NewNode at -    // this point because it is possible for a node to be put in the map before -    // being processed. +    // Note that N = IdToValueMap[Id] it is possible to have +    // N.getNode()->getNodeId() == NewNode at this point because it is possible +    // for a node to be put in the map before being processed.    }  } @@ -714,19 +640,22 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {    assert(From.getNode() != To.getNode() && "Potential legalization loop!");    // If expansion produced new nodes, make sure they are properly marked. -  ExpungeNode(From.getNode()); -  AnalyzeNewValue(To); // Expunges To. +  AnalyzeNewValue(To);    // Anything that used the old node should now use the new one.  Note that this    // can potentially cause recursive merging.    SmallSetVector<SDNode*, 16> NodesToAnalyze;    NodeUpdateListener NUL(*this, NodesToAnalyze);    do { -    DAG.ReplaceAllUsesOfValueWith(From, To); -    // The old node may still be present in a map like ExpandedIntegers or -    // PromotedIntegers.  Inform maps about the replacement. -    ReplacedValues[From] = To; +    // The old node may be present in a map like ExpandedIntegers or +    // PromotedIntegers. Inform maps about the replacement. +    auto FromId = getTableId(From); +    auto ToId = getTableId(To); + +    if (FromId != ToId) +      ReplacedValues[FromId] = ToId; +    DAG.ReplaceAllUsesOfValueWith(From, To);      // Process the list of nodes that need to be reanalyzed.      while (!NodesToAnalyze.empty()) { @@ -751,12 +680,15 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {            SDValue NewVal(M, i);            if (M->getNodeId() == Processed)              RemapValue(NewVal); -          DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);            // OldVal may be a target of the ReplacedValues map which was marked            // NewNode to force reanalysis because it was updated.  Ensure that            // anything that ReplacedValues mapped to OldVal will now be mapped            // all the way to NewVal. -          ReplacedValues[OldVal] = NewVal; +          auto OldValId = getTableId(OldVal); +          auto NewValId = getTableId(NewVal); +          DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); +          if (OldValId != NewValId) +            ReplacedValues[OldValId] = NewValId;          }          // The original node continues to exist in the DAG, marked NewNode.        } @@ -773,9 +705,11 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {           "Invalid type for promoted integer");    AnalyzeNewValue(Result); -  SDValue &OpEntry = PromotedIntegers[Op]; -  assert(!OpEntry.getNode() && "Node is already promoted!"); -  OpEntry = Result; +  auto &OpIdEntry = PromotedIntegers[getTableId(Op)]; +  assert((OpIdEntry == 0) && "Node is already promoted!"); +  OpIdEntry = getTableId(Result); + +  DAG.transferDbgValues(Op, Result);  }  void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -788,15 +722,15 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {           "Invalid type for softened float");    AnalyzeNewValue(Result); -  SDValue &OpEntry = SoftenedFloats[Op]; +  auto &OpIdEntry = SoftenedFloats[getTableId(Op)];    // Allow repeated calls to save f128 type nodes    // or any node with type that transforms to itself.    // Many operations on these types are not softened. -  assert((!OpEntry.getNode()|| +  assert(((OpIdEntry == 0) ||            Op.getValueType() == -          TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && +              TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&           "Node is already converted to integer!"); -  OpEntry = Result; +  OpIdEntry = getTableId(Result);  }  void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { @@ -805,9 +739,9 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {           "Invalid type for promoted float");    AnalyzeNewValue(Result); -  SDValue &OpEntry = PromotedFloats[Op]; -  assert(!OpEntry.getNode() && "Node is already promoted!"); -  OpEntry = Result; +  auto &OpIdEntry = PromotedFloats[getTableId(Op)]; +  assert((OpIdEntry == 0) && "Node is already promoted!"); +  OpIdEntry = getTableId(Result);  }  void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -818,19 +752,17 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {           "Invalid type for scalarized vector");    AnalyzeNewValue(Result); -  SDValue &OpEntry = ScalarizedVectors[Op]; -  assert(!OpEntry.getNode() && "Node is already scalarized!"); -  OpEntry = Result; +  auto &OpIdEntry = ScalarizedVectors[getTableId(Op)]; +  assert((OpIdEntry == 0) && "Node is already scalarized!"); +  OpIdEntry = getTableId(Result);  }  void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,                                            SDValue &Hi) { -  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; -  RemapValue(Entry.first); -  RemapValue(Entry.second); -  assert(Entry.first.getNode() && "Operand isn't expanded"); -  Lo = Entry.first; -  Hi = Entry.second; +  std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; +  assert((Entry.first != 0) && "Operand isn't expanded"); +  Lo = getSDValue(Entry.first); +  Hi = getSDValue(Entry.second);  }  void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, @@ -856,20 +788,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,    }    // Remember that this is the result of the node. -  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; -  assert(!Entry.first.getNode() && "Node already expanded"); -  Entry.first = Lo; -  Entry.second = Hi; +  std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; +  assert((Entry.first == 0) && "Node already expanded"); +  Entry.first = getTableId(Lo); +  Entry.second = getTableId(Hi);  }  void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,                                          SDValue &Hi) { -  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; -  RemapValue(Entry.first); -  RemapValue(Entry.second); -  assert(Entry.first.getNode() && "Operand isn't expanded"); -  Lo = Entry.first; -  Hi = Entry.second; +  std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; +  assert((Entry.first != 0) && "Operand isn't expanded"); +  Lo = getSDValue(Entry.first); +  Hi = getSDValue(Entry.second);  }  void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, @@ -882,21 +812,19 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,    AnalyzeNewValue(Lo);    AnalyzeNewValue(Hi); -  // Remember that this is the result of the node. -  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; -  assert(!Entry.first.getNode() && "Node already expanded"); -  Entry.first = Lo; -  Entry.second = Hi; +  std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; +  assert((Entry.first == 0) && "Node already expanded"); +  Entry.first = getTableId(Lo); +  Entry.second = getTableId(Hi);  }  void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,                                        SDValue &Hi) { -  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; -  RemapValue(Entry.first); -  RemapValue(Entry.second); -  assert(Entry.first.getNode() && "Operand isn't split"); -  Lo = Entry.first; -  Hi = Entry.second; +  std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; +  Lo = getSDValue(Entry.first); +  Hi = getSDValue(Entry.second); +  assert(Lo.getNode() && "Operand isn't split"); +  ;  }  void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, @@ -912,10 +840,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,    AnalyzeNewValue(Hi);    // Remember that this is the result of the node. -  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; -  assert(!Entry.first.getNode() && "Node already split"); -  Entry.first = Lo; -  Entry.second = Hi; +  std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; +  assert((Entry.first == 0) && "Node already split"); +  Entry.first = getTableId(Lo); +  Entry.second = getTableId(Hi);  }  void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -924,9 +852,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {           "Invalid type for widened vector");    AnalyzeNewValue(Result); -  SDValue &OpEntry = WidenedVectors[Op]; -  assert(!OpEntry.getNode() && "Node already widened!"); -  OpEntry = Result; +  auto &OpIdEntry = WidenedVectors[getTableId(Op)]; +  assert((OpIdEntry == 0) && "Node already widened!"); +  OpIdEntry = getTableId(Result);  } @@ -1064,11 +992,11 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {    EVT NVT = EVT::getIntegerVT(*DAG.getContext(),                                LVT.getSizeInBits() + HVT.getSizeInBits()); +  EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false);    Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);    Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);    Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, -                   DAG.getConstant(LVT.getSizeInBits(), dlHi, -                                   TLI.getPointerTy(DAG.getDataLayout()))); +                   DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT));    return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);  } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 64cb80e0d853..2c6b1ee7900f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -93,46 +93,81 @@ private:             N->getOpcode() == ISD::Register;    } +  // Bijection from SDValue to unique id. As each created node gets a +  // new id we do not need to worry about reuse expunging.  Should we +  // run out of ids, we can do a one time expensive compactifcation. +  typedef unsigned TableId; + +  TableId NextValueId = 1; + +  SmallDenseMap<SDValue, TableId, 8> ValueToIdMap; +  SmallDenseMap<TableId, SDValue, 8> IdToValueMap; +    /// For integer nodes that are below legal width, this map indicates what    /// promoted value to use. -  SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; +  SmallDenseMap<TableId, TableId, 8> PromotedIntegers;    /// For integer nodes that need to be expanded this map indicates which    /// operands are the expanded version of the input. -  SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; +  SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers;    /// For floating-point nodes converted to integers of the same size, this map    /// indicates the converted value to use. -  SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; +  SmallDenseMap<TableId, TableId, 8> SoftenedFloats;    /// For floating-point nodes that have a smaller precision than the smallest    /// supported precision, this map indicates what promoted value to use. -  SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; +  SmallDenseMap<TableId, TableId, 8> PromotedFloats;    /// For float nodes that need to be expanded this map indicates which operands    /// are the expanded version of the input. -  SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; +  SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats;    /// For nodes that are <1 x ty>, this map indicates the scalar value of type    /// 'ty' to use. -  SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; +  SmallDenseMap<TableId, TableId, 8> ScalarizedVectors;    /// For nodes that need to be split this map indicates which operands are the    /// expanded version of the input. -  SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; +  SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors;    /// For vector nodes that need to be widened, indicates the widened value to    /// use. -  SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; +  SmallDenseMap<TableId, TableId, 8> WidenedVectors;    /// For values that have been replaced with another, indicates the replacement    /// value to use. -  SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; +  SmallDenseMap<TableId, TableId, 8> ReplacedValues;    /// This defines a worklist of nodes to process. In order to be pushed onto    /// this worklist, all operands of a node must have already been processed.    SmallVector<SDNode*, 128> Worklist; +  TableId getTableId(SDValue V) { +    assert(V.getNode() && "Getting TableId on SDValue()"); + +    auto I = ValueToIdMap.find(V); +    if (I != ValueToIdMap.end()) { +      // replace if there's been a shift. +      RemapId(I->second); +      assert(I->second && "All Ids should be nonzero"); +      return I->second; +    } +    // Add if it's not there. +    ValueToIdMap.insert(std::make_pair(V, NextValueId)); +    IdToValueMap.insert(std::make_pair(NextValueId, V)); +    ++NextValueId; +    assert(NextValueId != 0 && +           "Ran out of Ids. Increase id type size or add compactification"); +    return NextValueId - 1; +  } + +  const SDValue &getSDValue(TableId &Id) { +    RemapId(Id); +    assert(Id && "TableId should be non-zero"); +    return IdToValueMap[Id]; +  } +  public:    explicit DAGTypeLegalizer(SelectionDAG &dag)      : TLI(dag.getTargetLoweringInfo()), DAG(dag), @@ -147,10 +182,25 @@ public:    bool run();    void NoteDeletion(SDNode *Old, SDNode *New) { -    ExpungeNode(Old); -    ExpungeNode(New); -    for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) -      ReplacedValues[SDValue(Old, i)] = SDValue(New, i); +    for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { +      TableId NewId = getTableId(SDValue(New, i)); +      TableId OldId = getTableId(SDValue(Old, i)); + +      if (OldId != NewId) +        ReplacedValues[OldId] = NewId; + +      // Delete Node from tables. +      ValueToIdMap.erase(SDValue(Old, i)); +      IdToValueMap.erase(OldId); +      PromotedIntegers.erase(OldId); +      ExpandedIntegers.erase(OldId); +      SoftenedFloats.erase(OldId); +      PromotedFloats.erase(OldId); +      ExpandedFloats.erase(OldId); +      ScalarizedVectors.erase(OldId); +      SplitVectors.erase(OldId); +      WidenedVectors.erase(OldId); +    }    }    SelectionDAG &getDAG() const { return DAG; } @@ -158,9 +208,9 @@ public:  private:    SDNode *AnalyzeNewNode(SDNode *N);    void AnalyzeNewValue(SDValue &Val); -  void ExpungeNode(SDNode *N);    void PerformExpensiveChecks(); -  void RemapValue(SDValue &N); +  void RemapId(TableId &Id); +  void RemapValue(SDValue &V);    // Common routines.    SDValue BitConvertToInteger(SDValue Op); @@ -207,8 +257,8 @@ private:    /// returns an i32, the lower 16 bits of which coincide with Op, and the upper    /// 16 bits of which contain rubbish.    SDValue GetPromotedInteger(SDValue Op) { -    SDValue &PromotedOp = PromotedIntegers[Op]; -    RemapValue(PromotedOp); +    TableId &PromotedId = PromotedIntegers[getTableId(Op)]; +    SDValue PromotedOp = getSDValue(PromotedId);      assert(PromotedOp.getNode() && "Operand wasn't promoted?");      return PromotedOp;    } @@ -282,7 +332,7 @@ private:    SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);    // Integer Operand Promotion. -  bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); +  bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);    SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);    SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);    SDValue PromoteIntOp_BITCAST(SDNode *N); @@ -373,11 +423,10 @@ private:    bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);    // Integer Operand Expansion. -  bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); +  bool ExpandIntegerOperand(SDNode *N, unsigned OpNo);    SDValue ExpandIntOp_BR_CC(SDNode *N);    SDValue ExpandIntOp_SELECT_CC(SDNode *N);    SDValue ExpandIntOp_SETCC(SDNode *N); -  SDValue ExpandIntOp_SETCCE(SDNode *N);    SDValue ExpandIntOp_SETCCCARRY(SDNode *N);    SDValue ExpandIntOp_Shift(SDNode *N);    SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); @@ -403,16 +452,15 @@ private:    /// stay in a register, the Op is not converted to an integer.    /// In that case, the given op is returned.    SDValue GetSoftenedFloat(SDValue Op) { -    auto Iter = SoftenedFloats.find(Op); +    TableId Id = getTableId(Op); +    auto Iter = SoftenedFloats.find(Id);      if (Iter == SoftenedFloats.end()) {        assert(isSimpleLegalType(Op.getValueType()) &&               "Operand wasn't converted to integer?");        return Op;      } - -    SDValue &SoftenedOp = Iter->second; +    SDValue SoftenedOp = getSDValue(Iter->second);      assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?"); -    RemapValue(SoftenedOp);      return SoftenedOp;    }    void SetSoftenedFloat(SDValue Op, SDValue Result); @@ -531,7 +579,7 @@ private:    void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);    // Float Operand Expansion. -  bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); +  bool ExpandFloatOperand(SDNode *N, unsigned OpNo);    SDValue ExpandFloatOp_BR_CC(SDNode *N);    SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);    SDValue ExpandFloatOp_FP_ROUND(SDNode *N); @@ -549,8 +597,8 @@ private:    //===--------------------------------------------------------------------===//    SDValue GetPromotedFloat(SDValue Op) { -    SDValue &PromotedOp = PromotedFloats[Op]; -    RemapValue(PromotedOp); +    TableId &PromotedId = PromotedFloats[getTableId(Op)]; +    SDValue PromotedOp = getSDValue(PromotedId);      assert(PromotedOp.getNode() && "Operand wasn't promoted?");      return PromotedOp;    } @@ -572,7 +620,7 @@ private:    SDValue PromoteFloatRes_UNDEF(SDNode *N);    SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); -  bool PromoteFloatOperand(SDNode *N, unsigned ResNo); +  bool PromoteFloatOperand(SDNode *N, unsigned OpNo);    SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);    SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);    SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); @@ -589,15 +637,15 @@ private:    /// element type, this returns the element. For example, if Op is a v1i32,    /// Op = < i32 val >, this method returns val, an i32.    SDValue GetScalarizedVector(SDValue Op) { -    SDValue &ScalarizedOp = ScalarizedVectors[Op]; -    RemapValue(ScalarizedOp); +    TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)]; +    SDValue ScalarizedOp = getSDValue(ScalarizedId);      assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");      return ScalarizedOp;    }    void SetScalarizedVector(SDValue Op, SDValue Result);    // Vector Result Scalarization: <1 x ty> -> ty. -  void ScalarizeVectorResult(SDNode *N, unsigned OpNo); +  void ScalarizeVectorResult(SDNode *N, unsigned ResNo);    SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);    SDValue ScalarizeVecRes_BinOp(SDNode *N);    SDValue ScalarizeVecRes_TernaryOp(SDNode *N); @@ -646,13 +694,14 @@ private:    void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);    // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. -  void SplitVectorResult(SDNode *N, unsigned OpNo); +  void SplitVectorResult(SDNode *N, unsigned ResNo);    void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); +  void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -662,9 +711,9 @@ private:    void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); -  void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); -  void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); -  void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); +  void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); +  void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); +  void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);    void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);    void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, @@ -684,7 +733,7 @@ private:    SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);    SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);    SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); -  SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); +  SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);    SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);    SDValue SplitVecOp_VSETCC(SDNode *N);    SDValue SplitVecOp_FP_ROUND(SDNode *N); @@ -701,8 +750,8 @@ private:    /// method returns a v4i32 for which the first two elements are the same as    /// those of Op, while the last two elements contain rubbish.    SDValue GetWidenedVector(SDValue Op) { -    SDValue &WidenedOp = WidenedVectors[Op]; -    RemapValue(WidenedOp); +    TableId &WidenedId = WidenedVectors[getTableId(Op)]; +    SDValue WidenedOp = getSDValue(WidenedId);      assert(WidenedOp.getNode() && "Operand wasn't widened?");      return WidenedOp;    } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 993465ae9dc2..df3134828af5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -300,6 +300,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {    Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);    Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); +  Chain = Hi.getValue(1);    // Handle endianness of the load.    if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout())) @@ -307,7 +308,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {    // Modified the chain - switch anything that used the old chain to use    // the new one. -  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +  ReplaceValueWith(SDValue(N, 1), Chain);  } @@ -384,7 +385,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {    // Build a vector of twice the length out of the expanded elements.    // For example <3 x i64> -> <6 x i32>. -  std::vector<SDValue> NewElts; +  SmallVector<SDValue, 16> NewElts;    NewElts.reserve(NumElts*2);    for (unsigned i = 0; i < NumElts; ++i) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 6a141818bb6d..67928d4bdbd5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -32,7 +32,6 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/CodeGen/ISDOpcodes.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/TargetLowering.h" @@ -41,6 +40,7 @@  #include "llvm/Support/Casting.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include <cassert>  #include <cstdint> @@ -63,7 +63,7 @@ class VectorLegalizer {    /// legalizing the same thing more than once.    SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; -  /// \brief Adds a node to the translation cache. +  /// Adds a node to the translation cache.    void AddLegalizedOperand(SDValue From, SDValue To) {      LegalizedNodes.insert(std::make_pair(From, To));      // If someone requests legalization of the new node, return itself. @@ -71,55 +71,55 @@ class VectorLegalizer {        LegalizedNodes.insert(std::make_pair(To, To));    } -  /// \brief Legalizes the given node. +  /// Legalizes the given node.    SDValue LegalizeOp(SDValue Op); -  /// \brief Assuming the node is legal, "legalize" the results. +  /// Assuming the node is legal, "legalize" the results.    SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); -  /// \brief Implements unrolling a VSETCC. +  /// Implements unrolling a VSETCC.    SDValue UnrollVSETCC(SDValue Op); -  /// \brief Implement expand-based legalization of vector operations. +  /// Implement expand-based legalization of vector operations.    ///    /// This is just a high-level routine to dispatch to specific code paths for    /// operations to legalize them.    SDValue Expand(SDValue Op); -  /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if +  /// Implements expansion for FNEG; falls back to UnrollVectorOp if    /// FSUB isn't legal.    ///    /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if    /// SINT_TO_FLOAT and SHR on vectors isn't legal.    SDValue ExpandUINT_TO_FLOAT(SDValue Op); -  /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. +  /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.    SDValue ExpandSEXTINREG(SDValue Op); -  /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. +  /// Implement expansion for ANY_EXTEND_VECTOR_INREG.    ///    /// Shuffles the low lanes of the operand into place and bitcasts to the proper    /// type. The contents of the bits in the extended part of each element are    /// undef.    SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); -  /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. +  /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.    ///    /// Shuffles the low lanes of the operand into place, bitcasts to the proper    /// type, then shifts left and arithmetic shifts right to introduce a sign    /// extension.    SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); -  /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. +  /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.    ///    /// Shuffles the low lanes of the operand into place and blends zeros into    /// the remaining lanes, finally bitcasting to the proper type.    SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); -  /// \brief Expand bswap of vectors into a shuffle if legal. +  /// Expand bswap of vectors into a shuffle if legal.    SDValue ExpandBSWAP(SDValue Op); -  /// \brief Implement vselect in terms of XOR, AND, OR when blend is not +  /// Implement vselect in terms of XOR, AND, OR when blend is not    /// supported by the target.    SDValue ExpandVSELECT(SDValue Op);    SDValue ExpandSELECT(SDValue Op); @@ -130,19 +130,20 @@ class VectorLegalizer {    SDValue ExpandBITREVERSE(SDValue Op);    SDValue ExpandCTLZ(SDValue Op);    SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); - -  /// \brief Implements vector promotion. +  SDValue ExpandStrictFPOp(SDValue Op); +   +  /// Implements vector promotion.    ///    /// This is essentially just bitcasting the operands to a different type and    /// bitcasting the result back to the original type.    SDValue Promote(SDValue Op); -  /// \brief Implements [SU]INT_TO_FP vector promotion. +  /// Implements [SU]INT_TO_FP vector promotion.    ///    /// This is a [zs]ext of the input operand to a larger integer type.    SDValue PromoteINT_TO_FP(SDValue Op); -  /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. +  /// Implements FP_TO_[SU]INT vector promotion of the result type.    ///    /// It is promoted to a larger integer type.  The result is then    /// truncated back to the original type. @@ -152,7 +153,7 @@ public:    VectorLegalizer(SelectionDAG& dag) :        DAG(dag), TLI(dag.getTargetLoweringInfo()) {} -  /// \brief Begin legalizer the vector operations in the DAG. +  /// Begin legalizer the vector operations in the DAG.    bool Run();  }; @@ -222,14 +223,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {    for (const SDValue &Op : Node->op_values())      Ops.push_back(LegalizeOp(Op)); -  SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); +  SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), +                           Op.getResNo());    bool HasVectorValue = false;    if (Op.getOpcode() == ISD::LOAD) {      LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());      ISD::LoadExtType ExtType = LD->getExtensionType();      if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { -      DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG)); +      LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; +                 Node->dump(&DAG));        switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),                                     LD->getMemoryVT())) {        default: llvm_unreachable("This action is not supported yet!"); @@ -261,8 +264,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {      EVT StVT = ST->getMemoryVT();      MVT ValVT = ST->getValue().getSimpleValueType();      if (StVT.isVector() && ST->isTruncatingStore()) { -      DEBUG(dbgs() << "\nLegalizing truncating vector store: "; -            Node->dump(&DAG)); +      LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: "; +                 Node->dump(&DAG));        switch (TLI.getTruncStoreAction(ValVT, StVT)) {        default: llvm_unreachable("This action is not supported yet!");        case TargetLowering::Legal: @@ -287,10 +290,34 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {    if (!HasVectorValue)      return TranslateLegalizeResults(Op, Result); -  EVT QueryType; +  TargetLowering::LegalizeAction Action = TargetLowering::Legal;    switch (Op.getOpcode()) {    default:      return TranslateLegalizeResults(Op, Result); +  case ISD::STRICT_FADD: +  case ISD::STRICT_FSUB: +  case ISD::STRICT_FMUL: +  case ISD::STRICT_FDIV: +  case ISD::STRICT_FSQRT: +  case ISD::STRICT_FMA: +  case ISD::STRICT_FPOW: +  case ISD::STRICT_FPOWI: +  case ISD::STRICT_FSIN: +  case ISD::STRICT_FCOS: +  case ISD::STRICT_FEXP: +  case ISD::STRICT_FEXP2: +  case ISD::STRICT_FLOG: +  case ISD::STRICT_FLOG10: +  case ISD::STRICT_FLOG2: +  case ISD::STRICT_FRINT: +  case ISD::STRICT_FNEARBYINT: +    // These pseudo-ops get legalized as if they were their non-strict +    // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT +    // is also legal, but if ISD::FSQRT requires expansion then so does +    // ISD::STRICT_FSQRT. +    Action = TLI.getStrictFPOperationAction(Node->getOpcode(),  +                                            Node->getValueType(0)); +    break;    case ISD::ADD:    case ISD::SUB:    case ISD::MUL: @@ -366,42 +393,47 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {    case ISD::UMAX:    case ISD::SMUL_LOHI:    case ISD::UMUL_LOHI: -    QueryType = Node->getValueType(0); +  case ISD::FCANONICALIZE: +    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));      break;    case ISD::FP_ROUND_INREG: -    QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); +    Action = TLI.getOperationAction(Node->getOpcode(),  +               cast<VTSDNode>(Node->getOperand(1))->getVT());      break;    case ISD::SINT_TO_FP:    case ISD::UINT_TO_FP: -    QueryType = Node->getOperand(0).getValueType(); +    Action = TLI.getOperationAction(Node->getOpcode(),  +                                    Node->getOperand(0).getValueType());      break;    case ISD::MSCATTER: -    QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); +    Action = TLI.getOperationAction(Node->getOpcode(), +               cast<MaskedScatterSDNode>(Node)->getValue().getValueType());      break;    case ISD::MSTORE: -    QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType(); +    Action = TLI.getOperationAction(Node->getOpcode(), +               cast<MaskedStoreSDNode>(Node)->getValue().getValueType());      break;    } -  DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); +  LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); -  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { +  switch (Action) {    default: llvm_unreachable("This action is not supported yet!");    case TargetLowering::Promote:      Result = Promote(Op);      Changed = true;      break;    case TargetLowering::Legal: -    DEBUG(dbgs() << "Legal node: nothing to do\n"); +    LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");      break;    case TargetLowering::Custom: { -    DEBUG(dbgs() << "Trying custom legalization\n"); +    LLVM_DEBUG(dbgs() << "Trying custom legalization\n");      if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { -      DEBUG(dbgs() << "Successfully custom legalized node\n"); +      LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");        Result = Tmp1;        break;      } -    DEBUG(dbgs() << "Could not custom legalize node\n"); +    LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");      LLVM_FALLTHROUGH;    }    case TargetLowering::Expand: @@ -649,9 +681,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {      Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);    } else {      SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - -    NewChain = Scalarized.getValue(1); -    Value = Scalarized.getValue(0); +    // Skip past MERGE_VALUE node if known. +    if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { +      NewChain = Scalarized.getOperand(1); +      Value = Scalarized.getOperand(0); +    } else { +      NewChain = Scalarized.getValue(1); +      Value = Scalarized.getValue(0); +    }    }    AddLegalizedOperand(Op.getValue(0), Value); @@ -662,35 +699,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {  SDValue VectorLegalizer::ExpandStore(SDValue Op) {    StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); - -  EVT StVT = ST->getMemoryVT(); -  EVT MemSclVT = StVT.getScalarType(); -  unsigned ScalarSize = MemSclVT.getSizeInBits(); - -  // Round odd types to the next pow of two. -  if (!isPowerOf2_32(ScalarSize)) { -    // FIXME: This is completely broken and inconsistent with ExpandLoad -    // handling. - -    // For sub-byte element sizes, this ends up with 0 stride between elements, -    // so the same element just gets re-written to the same location. There seem -    // to be tests explicitly testing for this broken behavior though.  tests -    // for this broken behavior. - -    LLVMContext &Ctx = *DAG.getContext(); - -    EVT NewMemVT -      = EVT::getVectorVT(Ctx, -                         MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), -                         StVT.getVectorNumElements()); - -    SDValue NewVectorStore = DAG.getTruncStore( -        ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), -        ST->getPointerInfo(), NewMemVT, ST->getAlignment(), -        ST->getMemOperand()->getFlags(), ST->getAAInfo()); -    ST = cast<StoreSDNode>(NewVectorStore.getNode()); -  } -    SDValue TF = TLI.scalarizeVectorStore(ST, DAG);    AddLegalizedOperand(Op, TF);    return TF; @@ -727,6 +735,24 @@ SDValue VectorLegalizer::Expand(SDValue Op) {      return ExpandCTLZ(Op);    case ISD::CTTZ_ZERO_UNDEF:      return ExpandCTTZ_ZERO_UNDEF(Op); +  case ISD::STRICT_FADD: +  case ISD::STRICT_FSUB:  +  case ISD::STRICT_FMUL: +  case ISD::STRICT_FDIV: +  case ISD::STRICT_FSQRT: +  case ISD::STRICT_FMA: +  case ISD::STRICT_FPOW: +  case ISD::STRICT_FPOWI: +  case ISD::STRICT_FSIN: +  case ISD::STRICT_FCOS: +  case ISD::STRICT_FEXP: +  case ISD::STRICT_FEXP2: +  case ISD::STRICT_FLOG: +  case ISD::STRICT_FLOG10: +  case ISD::STRICT_FLOG2: +  case ISD::STRICT_FRINT: +  case ISD::STRICT_FNEARBYINT: +    return ExpandStrictFPOp(Op);    default:      return DAG.UnrollVectorOp(Op.getNode());    } @@ -1020,7 +1046,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {    SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);    // Two to the power of half-word-size. -  SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType()); +  SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());    // Clear upper part of LO, lower HI    SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -1113,6 +1139,53 @@ SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {    return DAG.UnrollVectorOp(Op.getNode());  } +SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { +  EVT VT = Op.getValueType(); +  EVT EltVT = VT.getVectorElementType(); +  unsigned NumElems = VT.getVectorNumElements(); +  unsigned NumOpers = Op.getNumOperands(); +  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  EVT ValueVTs[] = {EltVT, MVT::Other}; +  SDValue Chain = Op.getOperand(0); +  SDLoc dl(Op); + +  SmallVector<SDValue, 32> OpValues; +  SmallVector<SDValue, 32> OpChains; +  for (unsigned i = 0; i < NumElems; ++i) { +    SmallVector<SDValue, 4> Opers; +    SDValue Idx = DAG.getConstant(i, dl,  +                                  TLI.getVectorIdxTy(DAG.getDataLayout())); + +    // The Chain is the first operand. +    Opers.push_back(Chain); + +    // Now process the remaining operands.  +    for (unsigned j = 1; j < NumOpers; ++j) { +      SDValue Oper = Op.getOperand(j); +      EVT OperVT = Oper.getValueType(); + +      if (OperVT.isVector()) +        Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,  +                           EltVT, Oper, Idx); + +      Opers.push_back(Oper); +    } +  +    SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + +    OpValues.push_back(ScalarOp.getValue(0)); +    OpChains.push_back(ScalarOp.getValue(1)); +  } + +  SDValue Result = DAG.getBuildVector(VT, dl, OpValues); +  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + +  AddLegalizedOperand(Op.getValue(0), Result); +  AddLegalizedOperand(Op.getValue(1), NewChain); + +  return NewChain; +} +  SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {    EVT VT = Op.getValueType();    unsigned NumElems = VT.getVectorNumElements(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index df1cbeb92740..1cd43ace48f3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -33,9 +33,8 @@ using namespace llvm;  //===----------------------------------------------------------------------===//  void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { -  DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; -        N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); +             dbgs() << "\n");    SDValue R = SDValue();    switch (N->getOpcode()) { @@ -169,9 +168,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,  }  SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { +  SDValue Op = N->getOperand(0); +  if (Op.getValueType().isVector() +      && Op.getValueType().getVectorNumElements() == 1 +      && !isSimpleLegalType(Op.getValueType())) +    Op = GetScalarizedVector(Op);    EVT NewVT = N->getValueType(0).getVectorElementType();    return DAG.getNode(ISD::BITCAST, SDLoc(N), -                     NewVT, N->getOperand(0)); +                     NewVT, Op);  }  SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { @@ -338,8 +342,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {        ScalarBool = TargetLowering::UndefinedBooleanContent;    } +  EVT CondVT = Cond.getValueType();    if (ScalarBool != VecBool) { -    EVT CondVT = Cond.getValueType();      switch (ScalarBool) {        case TargetLowering::UndefinedBooleanContent:          break; @@ -360,6 +364,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {      }    } +  // Truncate the condition if needed +  auto BoolVT = getSetCCResultType(CondVT); +  if (BoolVT.bitsLT(CondVT)) +    Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond); +    return DAG.getSelect(SDLoc(N),                         LHS.getValueType(), Cond, LHS,                         GetScalarizedVector(N->getOperand(2))); @@ -433,9 +442,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {  //===----------------------------------------------------------------------===//  bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { -  DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; -        N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); +             dbgs() << "\n");    SDValue Res = SDValue();    if (!Res.getNode()) { @@ -515,7 +523,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {                             N->getValueType(0).getScalarType(), Elt);    // Revectorize the result so the types line up with what the uses of this    // expression expect. -  return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op); +  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);  }  /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -618,9 +626,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {  /// invalid operands or may have other results that need legalization, we just  /// know that (at least) one result needs vector splitting.  void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { -  DEBUG(dbgs() << "Split node result: "; -        N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n");    SDValue Lo, Hi;    // See if the target wants to custom expand this node. @@ -749,6 +755,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {    case ISD::FMA:      SplitVecRes_TernaryOp(N, Lo, Hi);      break; +  case ISD::STRICT_FADD: +  case ISD::STRICT_FSUB: +  case ISD::STRICT_FMUL: +  case ISD::STRICT_FDIV: +  case ISD::STRICT_FSQRT: +  case ISD::STRICT_FMA: +  case ISD::STRICT_FPOW: +  case ISD::STRICT_FPOWI: +  case ISD::STRICT_FSIN: +  case ISD::STRICT_FCOS: +  case ISD::STRICT_FEXP: +  case ISD::STRICT_FEXP2: +  case ISD::STRICT_FLOG: +  case ISD::STRICT_FLOG10: +  case ISD::STRICT_FLOG2: +  case ISD::STRICT_FRINT: +  case ISD::STRICT_FNEARBYINT: +    SplitVecRes_StrictFPOp(N, Lo, Hi); +    break;    }    // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1028,6 +1053,56 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,    Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);  } +void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, +                                              SDValue &Hi) { +  unsigned NumOps = N->getNumOperands(); +  SDValue Chain = N->getOperand(0); +  EVT LoVT, HiVT; +  SDLoc dl(N); +  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + +  SmallVector<SDValue, 4> OpsLo; +  SmallVector<SDValue, 4> OpsHi; + +  // The Chain is the first operand. +  OpsLo.push_back(Chain); +  OpsHi.push_back(Chain); + +  // Now process the remaining operands.  +  for (unsigned i = 1; i < NumOps; ++i) { +    SDValue Op = N->getOperand(i);  +    SDValue OpLo = Op;  +    SDValue OpHi = Op;    + +    EVT InVT = Op.getValueType(); +    if (InVT.isVector()) {  +      // If the input also splits, handle it directly for a +      // compile time speedup. Otherwise split it by hand. +      if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) +        GetSplitVector(Op, OpLo, OpHi); +      else +        std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); +    } + +    OpsLo.push_back(OpLo); +    OpsHi.push_back(OpHi); +  } + +  EVT LoValueVTs[] = {LoVT, MVT::Other}; +  EVT HiValueVTs[] = {HiVT, MVT::Other}; +  Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); +  Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); +   +  // Build a factor node to remember that this Op is independent of the +  // other one. +  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,  +                      Lo.getValue(1), Hi.getValue(1)); + +  // Legalize the chain result - switch anything that used the old chain to +  // use the new one. +  ReplaceValueWith(SDValue(N, 1), Chain); +} +  void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,                                                       SDValue &Hi) {    SDValue Vec = N->getOperand(0); @@ -1200,16 +1275,16 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,                                     MLD->isExpandingLoad()); +  unsigned HiOffset = LoMemVT.getStoreSize(); -  MMO = DAG.getMachineFunction(). -    getMachineMemOperand(MLD->getPointerInfo(), -                         MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(), -                         SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); +  MMO = DAG.getMachineFunction().getMachineMemOperand( +      MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, +      HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), +      MLD->getRanges());    Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,                           ExtType, MLD->isExpandingLoad()); -    // Build a factor node to remember that this load is independent of the    // other one.    Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), @@ -1232,6 +1307,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,    SDValue Mask = MGT->getMask();    SDValue Src0 = MGT->getValue();    SDValue Index = MGT->getIndex(); +  SDValue Scale = MGT->getScale();    unsigned Alignment = MGT->getOriginalAlignment();    // Split Mask operand @@ -1263,11 +1339,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),                           Alignment, MGT->getAAInfo(), MGT->getRanges()); -  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; +  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};    Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,                             MMO); -  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; +  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};    Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,                             MMO); @@ -1365,8 +1441,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,      std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);      if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&          TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { -      DEBUG(dbgs() << "Split vector extend via incremental extend:"; -            N->dump(&DAG); dbgs() << "\n"); +      LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; +                 N->dump(&DAG); dbgs() << "\n");        // Extend the source vector by one step.        SDValue NewSrc =            DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); @@ -1501,9 +1577,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,  /// the node are known to be legal, but other operands of the node may need  /// legalization as well as the specified one.  bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { -  DEBUG(dbgs() << "Split node operand: "; -        N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n");    SDValue Res = SDValue();    // See if the target wants to custom split this node. @@ -1683,8 +1757,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {    // Use the appropriate scalar instruction on the split subvectors before    // reducing the now partially reduced smaller vector. -  SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi); -  return DAG.getNode(N->getOpcode(), dl, ResVT, Partial); +  SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags()); +  return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());  }  SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { @@ -1810,6 +1884,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,    SDValue Ch = MGT->getChain();    SDValue Ptr = MGT->getBasePtr();    SDValue Index = MGT->getIndex(); +  SDValue Scale = MGT->getScale();    SDValue Mask = MGT->getMask();    SDValue Src0 = MGT->getValue();    unsigned Alignment = MGT->getOriginalAlignment(); @@ -1842,7 +1917,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),                           Alignment, MGT->getAAInfo(), MGT->getRanges()); -  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; +  SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale};    SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,                                     OpsLo, MMO); @@ -1852,7 +1927,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,                           Alignment, MGT->getAAInfo(),                           MGT->getRanges()); -  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; +  SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale};    SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,                                     OpsHi, MMO); @@ -1916,10 +1991,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,                                     N->isCompressingStore()); -  MMO = DAG.getMachineFunction(). -    getMachineMemOperand(N->getPointerInfo(), -                         MachineMemOperand::MOStore,  HiMemVT.getStoreSize(), -                         SecondHalfAlignment, N->getAAInfo(), N->getRanges()); +  unsigned HiOffset = LoMemVT.getStoreSize(); + +  MMO = DAG.getMachineFunction().getMachineMemOperand( +      N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, +      HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), +      N->getRanges());    Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,                            N->isTruncatingStore(), N->isCompressingStore()); @@ -1935,6 +2012,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,    SDValue Ptr = N->getBasePtr();    SDValue Mask = N->getMask();    SDValue Index = N->getIndex(); +  SDValue Scale = N->getScale();    SDValue Data = N->getValue();    EVT MemoryVT = N->getMemoryVT();    unsigned Alignment = N->getOriginalAlignment(); @@ -1970,7 +2048,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,                           MachineMemOperand::MOStore, LoMemVT.getStoreSize(),                           Alignment, N->getAAInfo(), N->getRanges()); -  SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; +  SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};    Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),                              DL, OpsLo, MMO); @@ -1982,7 +2060,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,    // The order of the Scatter operation after split is well defined. The "Hi"    // part comes after the "Lo". So these two operations should be chained one    // after another. -  SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi}; +  SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};    return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),                                DL, OpsHi, MMO);  } @@ -2005,6 +2083,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {    EVT LoMemVT, HiMemVT;    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); +  // Scalarize if the split halves are not byte-sized. +  if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) +    return TLI.scalarizeVectorStore(N, DAG); +    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;    if (isTruncating) @@ -2089,9 +2171,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {      return SplitVecOp_UnaryOp(N);    SDLoc DL(N); -  // Extract the halves of the input via extract_subvector. +  // Get the split input vector.    SDValue InLoVec, InHiVec; -  std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); +  GetSplitVector(InVec, InLoVec, InHiVec);    // Truncate them to 1/2 the element size.    EVT HalfElementVT = IsFloat ?      EVT::getFloatingPointVT(InElementSize/2) : @@ -2164,9 +2246,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {  //===----------------------------------------------------------------------===//  void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { -  DEBUG(dbgs() << "Widen node result " << ResNo << ": "; -        N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); +             dbgs() << "\n");    // See if the target wants to custom widen this node.    if (CustomWidenLowerNode(N, N->getValueType(ResNo))) @@ -2948,6 +3029,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {    SDValue Mask = N->getMask();    EVT MaskVT = Mask.getValueType();    SDValue Src0 = GetWidenedVector(N->getValue()); +  SDValue Scale = N->getScale();    unsigned NumElts = WideVT.getVectorNumElements();    SDLoc dl(N); @@ -2963,7 +3045,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {                                       Index.getValueType().getScalarType(),                                       NumElts);    Index = ModifyToType(Index, WideIndexVT); -  SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; +  SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale };    SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),                                      N->getMemoryVT(), dl, Ops,                                      N->getMemOperand()); @@ -3309,9 +3391,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {  // Widen Vector Operand  //===----------------------------------------------------------------------===//  bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { -  DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; -        N->dump(&DAG); -        dbgs() << "\n"); +  LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); +             dbgs() << "\n");    SDValue Res = SDValue();    // See if the target wants to custom widen this node. @@ -3420,7 +3501,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {    // low lanes.    switch (N->getOpcode()) {    default: -    llvm_unreachable("Extend legalization on on extend operation!"); +    llvm_unreachable("Extend legalization on extend operation!");    case ISD::ANY_EXTEND:      return DAG.getAnyExtendVectorInReg(InOp, DL, VT);    case ISD::SIGN_EXTEND: @@ -3544,6 +3625,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {    // vector type.    StoreSDNode *ST = cast<StoreSDNode>(N); +  if (!ST->getMemoryVT().getScalarType().isByteSized()) +    return TLI.scalarizeVectorStore(ST, DAG); +    SmallVector<SDValue, 16> StChain;    if (ST->isTruncatingStore())      GenWidenVectorTruncStores(StChain, ST); @@ -3587,6 +3671,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {    SDValue DataOp = MSC->getValue();    SDValue Mask = MSC->getMask();    EVT MaskVT = Mask.getValueType(); +  SDValue Scale = MSC->getScale();    // Widen the value.    SDValue WideVal = GetWidenedVector(DataOp); @@ -3606,7 +3691,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {                                       NumElts);    Index = ModifyToType(Index, WideIndexVT); -  SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; +  SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index, +                   Scale};    return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),                                MSC->getMemoryVT(), dl, Ops,                                MSC->getMemOperand()); @@ -3616,6 +3702,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {    SDValue InOp0 = GetWidenedVector(N->getOperand(0));    SDValue InOp1 = GetWidenedVector(N->getOperand(1));    SDLoc dl(N); +  EVT VT = N->getValueType(0);    // WARNING: In this code we widen the compare instruction with garbage.    // This garbage may contain denormal floats which may be slow. Is this a real @@ -3625,18 +3712,23 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {    // Only some of the compared elements are legal.    EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),                                     InOp0.getValueType()); +  // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. +  if (VT.getScalarType() == MVT::i1) +    SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, +                           SVT.getVectorNumElements()); +    SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), -                     SVT, InOp0, InOp1, N->getOperand(2)); +                                  SVT, InOp0, InOp1, N->getOperand(2));    // Extract the needed results from the result vector.    EVT ResVT = EVT::getVectorVT(*DAG.getContext(),                                 SVT.getVectorElementType(), -                               N->getValueType(0).getVectorNumElements()); +                               VT.getVectorNumElements());    SDValue CC = DAG.getNode(        ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,        DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); -  return PromoteTargetBoolean(CC, N->getValueType(0)); +  return PromoteTargetBoolean(CC, VT);  } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index cf92907a8b5f..7e6b57426338 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -32,7 +32,8 @@ public:    enum DbgValueKind {      SDNODE = 0,             ///< Value is the result of an expression.      CONST = 1,              ///< Value is a constant. -    FRAMEIX = 2             ///< Value is contents of a stack location. +    FRAMEIX = 2,            ///< Value is contents of a stack location. +    VREG = 3                ///< Value is a virtual register.    };  private:    union { @@ -42,6 +43,7 @@ private:      } s;      const Value *Const;     ///< Valid for constants.      unsigned FrameIx;       ///< Valid for stack objects. +    unsigned VReg;          ///< Valid for registers.    } u;    DIVariable *Var;    DIExpression *Expr; @@ -69,12 +71,18 @@ public:      u.Const = C;    } -  /// Constructor for frame indices. -  SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl, -             unsigned O) -      : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { -    kind = FRAMEIX; -    u.FrameIx = FI; +  /// Constructor for virtual registers and frame indices. +  SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx, +             bool IsIndirect, DebugLoc DL, unsigned Order, +             enum DbgValueKind Kind) +      : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) { +    assert((Kind == VREG || Kind == FRAMEIX) && +           "Invalid SDDbgValue constructor"); +    kind = Kind; +    if (kind == VREG) +      u.VReg = VRegOrFrameIdx; +    else +      u.FrameIx = VRegOrFrameIdx;    }    /// Returns the kind. @@ -98,6 +106,9 @@ public:    /// Returns the FrameIx for a stack object    unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } +  /// Returns the Virtual Register for a VReg +  unsigned getVReg() const { assert (kind==VREG); return u.VReg; } +    /// Returns whether this is an indirect value.    bool isIndirect() const { return IsIndirect; } @@ -115,6 +126,28 @@ public:    bool isInvalidated() const { return Invalid; }  }; +/// Holds the information from a dbg_label node through SDISel. +/// We do not use SDValue here to avoid including its header. +class SDDbgLabel { +  MDNode *Label; +  DebugLoc DL; +  unsigned Order; + +public: +  SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O) +      : Label(Label), DL(std::move(dl)), Order(O) {} + +  /// Returns the MDNode pointer for the label. +  MDNode *getLabel() const { return Label; } + +  /// Returns the DebugLoc. +  DebugLoc getDebugLoc() const { return DL; } + +  /// Returns the SDNodeOrder.  This is the order of the preceding node in the +  /// input. +  unsigned getOrder() const { return Order; } +}; +  } // end llvm namespace  #endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 698e14453d1d..3944d7df286d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,6 +13,7 @@  #include "InstrEmitter.h"  #include "ScheduleDAGSDNodes.h" +#include "SDNodeDbgValue.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/Statistic.h" @@ -115,7 +116,7 @@ private:  /// Schedule - Schedule the DAG using list scheduling.  void ScheduleDAGFast::Schedule() { -  DEBUG(dbgs() << "********** List Scheduling **********\n"); +  LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");    NumLiveRegs = 0;    LiveRegDefs.resize(TRI->getNumRegs(), nullptr); @@ -124,8 +125,8 @@ void ScheduleDAGFast::Schedule() {    // Build the scheduling graph.    BuildSchedGraph(nullptr); -  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) -          SUnits[su].dumpAll(this)); +  LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su] +                 .dumpAll(this));    // Execute the actual scheduling loop.    ListScheduleBottomUp(); @@ -180,8 +181,8 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {  /// count of its predecessors. If a predecessor pending count is zero, add it to  /// the Available queue.  void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { -  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); -  DEBUG(SU->dump(this)); +  LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); +  LLVM_DEBUG(SU->dump(this));    assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");    SU->setHeightToAtLeast(CurCycle); @@ -236,7 +237,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {      if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))        return nullptr; -    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); +    LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");      assert(NewNodes.size() == 2 && "Expected a load folding node!");      N = NewNodes[1]; @@ -346,7 +347,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {      SU = NewSU;    } -  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); +  LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");    NewSU = Clone(SU);    // New SUnit has the exact same predecessors. @@ -592,14 +593,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() {            // Issue copies, these can be expensive cross register class copies.            SmallVector<SUnit*, 2> Copies;            InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); -          DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum -                       << " to SU #" << Copies.front()->NodeNum << "\n"); +          LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum +                            << " to SU #" << Copies.front()->NodeNum << "\n");            AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));            NewDef = Copies.back();          } -        DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum -                     << " to SU #" << TrySU->NodeNum << "\n"); +        LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum +                          << " to SU #" << TrySU->NodeNum << "\n");          LiveRegDefs[Reg] = NewDef;          AddPred(NewDef, SDep(TrySU, SDep::Artificial));          TrySU->isAvailable = false; @@ -666,8 +667,8 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {      // These nodes do not need to be translated into MIs.      return; -  DEBUG(dbgs() << "\n*** Scheduling: "); -  DEBUG(N->dump(DAG)); +  LLVM_DEBUG(dbgs() << "\n*** Scheduling: "); +  LLVM_DEBUG(N->dump(DAG));    Sequence.push_back(N);    unsigned NumOps = N->getNumOperands(); @@ -713,7 +714,7 @@ static SDNode *findGluedUser(SDNode *N) {  }  void ScheduleDAGLinearize::Schedule() { -  DEBUG(dbgs() << "********** DAG Linearization **********\n"); +  LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n");    SmallVector<SDNode*, 8> Glues;    unsigned DAGSize = 0; @@ -763,19 +764,29 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {    InstrEmitter Emitter(BB, InsertPos);    DenseMap<SDValue, unsigned> VRBaseMap; -  DEBUG({ -      dbgs() << "\n*** Final schedule ***\n"; -    }); +  LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); -  // FIXME: Handle dbg_values.    unsigned NumNodes = Sequence.size(); +  MachineBasicBlock *BB = Emitter.getBlock();    for (unsigned i = 0; i != NumNodes; ++i) {      SDNode *N = Sequence[NumNodes-i-1]; -    DEBUG(N->dump(DAG)); +    LLVM_DEBUG(N->dump(DAG));      Emitter.EmitNode(N, false, false, VRBaseMap); + +    // Emit any debug values associated with the node. +    if (N->getHasDebugValue()) { +      MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); +      for (auto DV : DAG->GetDbgValues(N)) { +        if (DV->isInvalidated()) +          continue; +        if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) +          BB->insert(InsertPos, DbgMI); +        DV->setIsInvalidated(); +      } +    }    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');    InsertPos = Emitter.getInsertPos();    return Emitter.getBlock(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 82337d43c5c9..43e8ffd3839c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -26,7 +26,6 @@  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/ScheduleDAG.h"  #include "llvm/CodeGen/ScheduleHazardRecognizer.h"  #include "llvm/CodeGen/SchedulerRegistry.h" @@ -37,6 +36,7 @@  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/InlineAsm.h"  #include "llvm/MC/MCInstrDesc.h"  #include "llvm/MC/MCRegisterInfo.h" @@ -46,6 +46,7 @@  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/raw_ostream.h"  #include <algorithm>  #include <cassert> @@ -346,8 +347,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,  /// Schedule - Schedule the DAG using list scheduling.  void ScheduleDAGRRList::Schedule() { -  DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) -               << " '" << BB->getName() << "' **********\n"); +  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) +                    << " '" << BB->getName() << "' **********\n");    CurCycle = 0;    IssueCount = 0; @@ -364,8 +365,7 @@ void ScheduleDAGRRList::Schedule() {    // Build the scheduling graph.    BuildSchedGraph(nullptr); -  DEBUG(for (SUnit &SU : SUnits) -          SU.dumpAll(this)); +  LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this));    Topo.InitDAGTopologicalSorting();    AvailableQueue->initNodes(SUnits); @@ -377,11 +377,11 @@ void ScheduleDAGRRList::Schedule() {    AvailableQueue->releaseState(); -  DEBUG({ -      dbgs() << "*** Final schedule ***\n"; -      dumpSchedule(); -      dbgs() << '\n'; -    }); +  LLVM_DEBUG({ +    dbgs() << "*** Final schedule ***\n"; +    dumpSchedule(); +    dbgs() << '\n'; +  });  }  //===----------------------------------------------------------------------===// @@ -728,13 +728,13 @@ static void resetVRegCycle(SUnit *SU);  /// count of its predecessors. If a predecessor pending count is zero, add it to  /// the Available queue.  void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { -  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); -  DEBUG(SU->dump(this)); +  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); +  LLVM_DEBUG(SU->dump(this));  #ifndef NDEBUG    if (CurCycle < SU->getHeight()) -    DEBUG(dbgs() << "   Height [" << SU->getHeight() -          << "] pipeline stall!\n"); +    LLVM_DEBUG(dbgs() << "   Height [" << SU->getHeight() +                      << "] pipeline stall!\n");  #endif    // FIXME: Do not modify node height. It may interfere with @@ -827,8 +827,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {  /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and  /// its predecessor states to reflect the change.  void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { -  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); -  DEBUG(SU->dump(this)); +  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); +  LLVM_DEBUG(SU->dump(this));    for (SDep &Pred : SU->Preds) {      CapturePred(&Pred); @@ -1010,7 +1010,35 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {      computeLatency(LoadSU);    } -  DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); +  bool isNewN = true; +  SUnit *NewSU; +  // This can only happen when isNewLoad is false. +  if (N->getNodeId() != -1) { +    NewSU = &SUnits[N->getNodeId()]; +    // If NewSU has already been scheduled, we need to clone it, but this +    // negates the benefit to unfolding so just return SU. +    if (NewSU->isScheduled) +      return SU; +    isNewN = false; +  } else { +    NewSU = CreateNewSUnit(N); +    N->setNodeId(NewSU->NodeNum); + +    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); +    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { +      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { +        NewSU->isTwoAddress = true; +        break; +      } +    } +    if (MCID.isCommutable()) +      NewSU->isCommutable = true; + +    InitNumRegDefsLeft(NewSU); +    computeLatency(NewSU); +  } + +  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");    // Now that we are committed to unfolding replace DAG Uses.    for (unsigned i = 0; i != NumVals; ++i) @@ -1018,23 +1046,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),                                   SDValue(LoadNode, 1)); -  SUnit *NewSU = CreateNewSUnit(N); -  assert(N->getNodeId() == -1 && "Node already inserted!"); -  N->setNodeId(NewSU->NodeNum); - -  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); -  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { -    if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { -      NewSU->isTwoAddress = true; -      break; -    } -  } -  if (MCID.isCommutable()) -    NewSU->isCommutable = true; - -  InitNumRegDefsLeft(NewSU); -  computeLatency(NewSU); -    // Record all the edges to and from the old SU, by category.    SmallVector<SDep, 4> ChainPreds;    SmallVector<SDep, 4> ChainSuccs; @@ -1100,7 +1111,8 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {    if (isNewLoad)      AvailableQueue->addNode(LoadSU); -  AvailableQueue->addNode(NewSU); +  if (isNewN) +    AvailableQueue->addNode(NewSU);    ++NumUnfolds; @@ -1117,12 +1129,13 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {    if (!N)      return nullptr; -  DEBUG(dbgs() << "Considering duplicating the SU\n"); -  DEBUG(SU->dump(this)); +  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); +  LLVM_DEBUG(SU->dump(this));    if (N->getGluedNode() &&        !TII->canCopyGluedNodeDuringSchedule(N)) { -    DEBUG(dbgs() +    LLVM_DEBUG( +        dbgs()          << "Giving up because it has incoming glue and the target does not "             "want to copy it\n");      return nullptr; @@ -1133,7 +1146,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {    for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {      MVT VT = N->getSimpleValueType(i);      if (VT == MVT::Glue) { -      DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); +      LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");        return nullptr;      } else if (VT == MVT::Other)        TryUnfold = true; @@ -1141,8 +1154,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {    for (const SDValue &Op : N->op_values()) {      MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());      if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { -      DEBUG(dbgs() << "Giving up because it one of the operands is glue and " -                      "the target does not want to copy it\n"); +      LLVM_DEBUG( +          dbgs() << "Giving up because it one of the operands is glue and " +                    "the target does not want to copy it\n");        return nullptr;      }    } @@ -1159,7 +1173,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {        return SU;    } -  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n"); +  LLVM_DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");    NewSU = CreateClone(SU);    // New SUnit has the exact same predecessors. @@ -1420,7 +1434,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {      // Furthermore, it may have been made available again, in which case it is      // now already in the AvailableQueue.      if (SU->isAvailable && !SU->NodeQueueId) { -      DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n'); +      LLVM_DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');        AvailableQueue->push(SU);      }      if (i < Interferences.size()) @@ -1441,12 +1455,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {        SmallVector<unsigned, 4> LRegs;        if (!DelayForLiveRegsBottomUp(CurSU, LRegs))          break; -      DEBUG(dbgs() << "    Interfering reg "; -            if (LRegs[0] == TRI->getNumRegs()) -              dbgs() << "CallResource"; -            else -              dbgs() << printReg(LRegs[0], TRI); -            dbgs() << " SU #" << CurSU->NodeNum << '\n'); +      LLVM_DEBUG(dbgs() << "    Interfering reg "; +                 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; +                 else dbgs() << printReg(LRegs[0], TRI); +                 dbgs() << " SU #" << CurSU->NodeNum << '\n');        std::pair<LRegsMapT::iterator, bool> LRegsPair =          LRegsMap.insert(std::make_pair(CurSU, LRegs));        if (LRegsPair.second) { @@ -1492,17 +1504,17 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {          if (!BtSU->isPending)            AvailableQueue->remove(BtSU);        } -      DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" -            << TrySU->NodeNum << ")\n"); +      LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum +                        << ") to SU(" << TrySU->NodeNum << ")\n");        AddPred(TrySU, SDep(BtSU, SDep::Artificial));        // If one or more successors has been unscheduled, then the current        // node is no longer available.        if (!TrySU->isAvailable || !TrySU->NodeQueueId) { -        DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); +        LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");          CurSU = AvailableQueue->pop();        } else { -        DEBUG(dbgs() << "TrySU available\n"); +        LLVM_DEBUG(dbgs() << "TrySU available\n");          // Available and in AvailableQueue          AvailableQueue->remove(TrySU);          CurSU = TrySU; @@ -1546,14 +1558,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {        // Issue copies, these can be expensive cross register class copies.        SmallVector<SUnit*, 2> Copies;        InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); -      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum -            << " to SU #" << Copies.front()->NodeNum << "\n"); +      LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum +                        << " to SU #" << Copies.front()->NodeNum << "\n");        AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));        NewDef = Copies.back();      } -    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum -          << " to SU #" << TrySU->NodeNum << "\n"); +    LLVM_DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum +                      << " to SU #" << TrySU->NodeNum << "\n");      LiveRegDefs[Reg] = NewDef;      AddPred(NewDef, SDep(TrySU, SDep::Artificial));      TrySU->isAvailable = false; @@ -1581,8 +1593,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {    // priority. If it is not ready put it back.  Schedule the node.    Sequence.reserve(SUnits.size());    while (!AvailableQueue->empty() || !Interferences.empty()) { -    DEBUG(dbgs() << "\nExamining Available:\n"; -          AvailableQueue->dump(this)); +    LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; +               AvailableQueue->dump(this));      // Pick the best node to schedule taking all constraints into      // consideration. @@ -2045,8 +2057,8 @@ LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {      unsigned Id = RC->getID();      unsigned RP = RegPressure[Id];      if (!RP) continue; -    DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " -          << RegLimit[Id] << '\n'); +    LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " +                      << RegLimit[Id] << '\n');    }  }  #endif @@ -2198,14 +2210,15 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {      if (RegPressure[RCId] < Cost) {        // Register pressure tracking is imprecise. This can happen. But we try        // hard not to let it happen because it likely results in poor scheduling. -      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n"); +      LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum +                        << ") has too many regdefs\n");        RegPressure[RCId] = 0;      }      else {        RegPressure[RCId] -= Cost;      }    } -  DEBUG(dumpRegPressure()); +  LLVM_DEBUG(dumpRegPressure());  }  void RegReductionPQBase::unscheduledNode(SUnit *SU) { @@ -2285,7 +2298,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {      }    } -  DEBUG(dumpRegPressure()); +  LLVM_DEBUG(dumpRegPressure());  }  //===----------------------------------------------------------------------===// @@ -2380,7 +2393,7 @@ static void initVRegCycle(SUnit *SU) {    if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))      return; -  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); +  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");    SU->isVRegCycle = true; @@ -2418,7 +2431,7 @@ static bool hasVRegCycleUse(const SUnit *SU) {      if (Pred.isCtrl()) continue;  // ignore chain preds      if (Pred.getSUnit()->isVRegCycle &&          Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { -      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n"); +      LLVM_DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");        return true;      }    } @@ -2478,9 +2491,9 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,      int LDepth = left->getDepth() - LPenalty;      int RDepth = right->getDepth() - RPenalty;      if (LDepth != RDepth) { -      DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum -            << ") depth " << LDepth << " vs SU (" << right->NodeNum -            << ") depth " << RDepth << "\n"); +      LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum +                        << ") depth " << LDepth << " vs SU (" << right->NodeNum +                        << ") depth " << RDepth << "\n");        return LDepth < RDepth ? 1 : -1;      }      if (left->Latency != right->Latency) @@ -2502,9 +2515,9 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {        static const char *const PhysRegMsg[] = { " has no physreg",                                                  " defines a physreg" };        #endif -      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") " -            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " -            << PhysRegMsg[RHasPhysReg] << "\n"); +      LLVM_DEBUG(dbgs() << "  SU (" << left->NodeNum << ") " +                        << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum +                        << ") " << PhysRegMsg[RHasPhysReg] << "\n");        return LHasPhysReg < RHasPhysReg;      }    } @@ -2648,13 +2661,13 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {    // Avoid causing spills. If register pressure is high, schedule for    // register pressure reduction.    if (LHigh && !RHigh) { -    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU(" -          << right->NodeNum << ")\n"); +    LLVM_DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU(" +                      << right->NodeNum << ")\n");      return true;    }    else if (!LHigh && RHigh) { -    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU(" -          << left->NodeNum << ")\n"); +    LLVM_DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU(" +                      << left->NodeNum << ")\n");      return false;    }    if (!LHigh && !RHigh) { @@ -2716,8 +2729,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {      RPDiff = SPQ->RegPressureDiff(right, RLiveUses);    }    if (!DisableSchedRegPressure && LPDiff != RPDiff) { -    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff -          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); +    LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum +                      << "): " << LPDiff << " != SU(" << right->NodeNum +                      << "): " << RPDiff << "\n");      return LPDiff > RPDiff;    } @@ -2729,8 +2743,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {    }    if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { -    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses -          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); +    LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses +                      << " != SU(" << right->NodeNum << "): " << RLiveUses +                      << "\n");      return LLiveUses < RLiveUses;    } @@ -2744,9 +2759,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {    if (!DisableSchedCriticalPath) {      int spread = (int)left->getDepth() - (int)right->getDepth();      if (std::abs(spread) > MaxReorderWindow) { -      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " -            << left->getDepth() << " != SU(" << right->NodeNum << "): " -            << right->getDepth() << "\n"); +      LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " +                        << left->getDepth() << " != SU(" << right->NodeNum +                        << "): " << right->getDepth() << "\n");        return left->getDepth() < right->getDepth();      }    } @@ -2967,9 +2982,10 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {      // Ok, the transformation is safe and the heuristics suggest it is      // profitable. Update the graph. -    DEBUG(dbgs() << "    Prescheduling SU #" << SU.NodeNum -                 << " next to PredSU #" << PredSU->NodeNum -                 << " to guide scheduling in the presence of multiple uses\n"); +    LLVM_DEBUG( +        dbgs() << "    Prescheduling SU #" << SU.NodeNum << " next to PredSU #" +               << PredSU->NodeNum +               << " to guide scheduling in the presence of multiple uses\n");      for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {        SDep Edge = PredSU->Succs[i];        assert(!Edge.isAssignedRegDep()); @@ -3058,8 +3074,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {               (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||               (!SU.isCommutable && SuccSU->isCommutable)) &&              !scheduleDAG->IsReachable(SuccSU, &SU)) { -          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #" -                       << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); +          LLVM_DEBUG(dbgs() +                     << "    Adding a pseudo-two-addr edge from SU #" +                     << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");            scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));          }        } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index c09b47af26a6..430d8fb34476 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,6 +27,7 @@  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/MC/MCInstrItineraries.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" @@ -243,7 +244,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {      return;    // Sort them in increasing order. -  std::sort(Offsets.begin(), Offsets.end()); +  llvm::sort(Offsets.begin(), Offsets.end());    // Check if the loads are close enough.    SmallVector<SDNode*, 4> Loads; @@ -910,6 +911,39 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {      MachineBasicBlock *InsertBB = Emitter.getBlock();      MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();      InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); + +    SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin(); +    SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd(); +    // Now emit the rest according to source order. +    LastOrder = 0; +    for (const auto &InstrOrder : Orders) { +      unsigned Order = InstrOrder.first; +      MachineInstr *MI = InstrOrder.second; +      if (!MI) +        continue; + +      // Insert all SDDbgLabel's whose order(s) are before "Order". +      for (; DLI != DLE && +             (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order; +             ++DLI) { +        MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI); +        if (DbgMI) { +          if (!LastOrder) +            // Insert to start of the BB (after PHIs). +            BB->insert(BBBegin, DbgMI); +          else { +            // Insert at the instruction, which may be in a different +            // block, if the block was split by a custom inserter. +            MachineBasicBlock::iterator Pos = MI; +            MI->getParent()->insert(Pos, DbgMI); +          } +        } +      } +      if (DLI == DLE) +        break; + +      LastOrder = Order; +    }    }    InsertPos = Emitter.getInsertPos(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index a058942c5689..6417e16bd0fd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -17,10 +17,10 @@  #include "llvm/CodeGen/ISDOpcodes.h"  #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/ScheduleDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h"  #include <cassert>  #include <string>  #include <vector> @@ -88,7 +88,7 @@ class InstrItineraryData;      /// Clone - Creates a clone of the specified SUnit. It does not copy the      /// predecessors / successors info nor the temporary scheduling states.      /// -    SUnit *Clone(SUnit *N); +    SUnit *Clone(SUnit *Old);      /// BuildSchedGraph - Build the SUnit graph from the selection dag that we      /// are input.  This SUnit graph is similar to the SelectionDAG, but diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 07b46b9183ab..84055f8ecc1a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -93,8 +93,8 @@ private:  /// Schedule - Schedule the DAG using list scheduling.  void ScheduleDAGVLIW::Schedule() { -  DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) -               << " '" << BB->getName() << "' **********\n"); +  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) +                    << " '" << BB->getName() << "' **********\n");    // Build the scheduling graph.    BuildSchedGraph(AA); @@ -151,8 +151,8 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {  /// count of its successors. If a successor pending count is zero, add it to  /// the Available queue.  void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { -  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); -  DEBUG(SU->dump(this)); +  LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); +  LLVM_DEBUG(SU->dump(this));    Sequence.push_back(SU);    assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); @@ -246,7 +246,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {      } else if (!HasNoopHazards) {        // Otherwise, we have a pipeline stall, but no other problem, just advance        // the current cycle and try again. -      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); +      LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");        HazardRec->AdvanceCycle();        ++NumStalls;        ++CurCycle; @@ -254,7 +254,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {        // Otherwise, we have no instructions to issue and we have instructions        // that will fault if we don't do this right.  This is the case for        // processors without pipeline interlocks and other cases. -      DEBUG(dbgs() << "*** Emitting noop\n"); +      LLVM_DEBUG(dbgs() << "*** Emitting noop\n");        HazardRec->EmitNoop();        Sequence.push_back(nullptr);   // NULL here means noop        ++NumNoops; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3ffc6fa9a059..48e03c6da68f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -32,7 +32,6 @@  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"  #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -58,6 +57,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/ManagedStatic.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/Mutex.h" @@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}  #define DEBUG_TYPE "selectiondag" +static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", +       cl::Hidden, cl::init(true), +       cl::desc("Gang up loads and stores generated by inlining of memcpy")); + +static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", +       cl::desc("Number limit for gluing ld/st of memcpy."), +       cl::Hidden, cl::init(0)); +  static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { -  DEBUG( -    dbgs() << Msg; -    V.getNode()->dump(G); -  ); +  LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G););  }  //===----------------------------------------------------------------------===// @@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) {    return true;  } +bool ISD::matchUnaryPredicate(SDValue Op, +                              std::function<bool(ConstantSDNode *)> Match) { +  if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) +    return Match(Cst); + +  if (ISD::BUILD_VECTOR != Op.getOpcode()) +    return false; + +  EVT SVT = Op.getValueType().getScalarType(); +  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { +    auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); +    if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) +      return false; +  } +  return true; +} + +bool ISD::matchBinaryPredicate( +    SDValue LHS, SDValue RHS, +    std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { +  if (LHS.getValueType() != RHS.getValueType()) +    return false; + +  if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) +    if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) +      return Match(LHSCst, RHSCst); + +  if (ISD::BUILD_VECTOR != LHS.getOpcode() || +      ISD::BUILD_VECTOR != RHS.getOpcode()) +    return false; + +  EVT SVT = LHS.getValueType().getScalarType(); +  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { +    auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); +    auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); +    if (!LHSCst || !RHSCst) +      return false; +    if (LHSCst->getValueType(0) != SVT || +        LHSCst->getValueType(0) != RHSCst->getValueType(0)) +      return false; +    if (!Match(LHSCst, RHSCst)) +      return false; +  } +  return true; +} +  ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {    switch (ExtType) {    case ISD::EXTLOAD: @@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {      ID.AddInteger(ST->getPointerInfo().getAddrSpace());      break;    } +  case ISD::MLOAD: { +    const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); +    ID.AddInteger(MLD->getMemoryVT().getRawBits()); +    ID.AddInteger(MLD->getRawSubclassData()); +    ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::MSTORE: { +    const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); +    ID.AddInteger(MST->getMemoryVT().getRawBits()); +    ID.AddInteger(MST->getRawSubclassData()); +    ID.AddInteger(MST->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::MGATHER: { +    const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); +    ID.AddInteger(MG->getMemoryVT().getRawBits()); +    ID.AddInteger(MG->getRawSubclassData()); +    ID.AddInteger(MG->getPointerInfo().getAddrSpace()); +    break; +  } +  case ISD::MSCATTER: { +    const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); +    ID.AddInteger(MS->getMemoryVT().getRawBits()); +    ID.AddInteger(MS->getRawSubclassData()); +    ID.AddInteger(MS->getPointerInfo().getAddrSpace()); +    break; +  }    case ISD::ATOMIC_CMP_SWAP:    case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:    case ISD::ATOMIC_SWAP:    case ISD::ATOMIC_LOAD_ADD:    case ISD::ATOMIC_LOAD_SUB:    case ISD::ATOMIC_LOAD_AND: +  case ISD::ATOMIC_LOAD_CLR:    case ISD::ATOMIC_LOAD_OR:    case ISD::ATOMIC_LOAD_XOR:    case ISD::ATOMIC_LOAD_NAND: @@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) {  }  #endif // NDEBUG -/// \brief Insert a newly allocated node into the DAG. +/// Insert a newly allocated node into the DAG.  ///  /// Handles insertion into the all nodes list and CSE map, as well as  /// verification and other common operations when a new node is allocated. @@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)  void SelectionDAG::init(MachineFunction &NewMF,                          OptimizationRemarkEmitter &NewORE, -                        Pass *PassPtr) { +                        Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, +                        DivergenceAnalysis * Divergence) {    MF = &NewMF;    SDAGISelPass = PassPtr;    ORE = &NewORE;    TLI = getSubtarget().getTargetLowering();    TSI = getSubtarget().getSelectionDAGInfo(); +  LibInfo = LibraryInfo;    Context = &MF->getFunction().getContext(); +  DA = Divergence;  }  SelectionDAG::~SelectionDAG() { @@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {  }  SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { -  EVT EltVT = VT.getScalarType(); -  SDValue TrueValue; -  switch (TLI->getBooleanContents(VT)) { -    case TargetLowering::ZeroOrOneBooleanContent: -    case TargetLowering::UndefinedBooleanContent: -      TrueValue = getConstant(1, DL, VT); -      break; -    case TargetLowering::ZeroOrNegativeOneBooleanContent: -      TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, -                              VT); -      break; -  } +  SDValue TrueValue = getBoolConstant(true, DL, VT, VT);    return getNode(ISD::XOR, DL, VT, Val, TrueValue);  } +SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, +                                      EVT OpVT) { +  if (!V) +    return getConstant(0, DL, VT); + +  switch (TLI->getBooleanContents(OpVT)) { +  case TargetLowering::ZeroOrOneBooleanContent: +  case TargetLowering::UndefinedBooleanContent: +    return getConstant(1, DL, VT); +  case TargetLowering::ZeroOrNegativeOneBooleanContent: +    return getAllOnesConstant(DL, VT); +  } +  llvm_unreachable("Unexpected boolean content enum!"); +} +  SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,                                    bool isT, bool isO) {    EVT EltVT = VT.getScalarType(); @@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,        return SDValue(N, 0);    if (!N) { -    N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); +    N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT);      CSEMap.InsertNode(N, IP);      InsertNode(N);      NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); @@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,        return SDValue(N, 0);    if (!N) { -    N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT); +    N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT);      CSEMap.InsertNode(N, IP);      InsertNode(N);    } @@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,    if (N1.isUndef())      commuteShuffle(N1, N2, MaskVec); -  // If shuffling a splat, try to blend the splat instead. We do this here so -  // that even when this arises during lowering we don't have to re-handle it. -  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { -    BitVector UndefElements; -    SDValue Splat = BV->getSplatValue(&UndefElements); -    if (!Splat) -      return; +  if (TLI->hasVectorBlend()) { +    // If shuffling a splat, try to blend the splat instead. We do this here so +    // that even when this arises during lowering we don't have to re-handle it. +    auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { +      BitVector UndefElements; +      SDValue Splat = BV->getSplatValue(&UndefElements); +      if (!Splat) +        return; -    for (int i = 0; i < NElts; ++i) { -      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) -        continue; +      for (int i = 0; i < NElts; ++i) { +        if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) +          continue; -      // If this input comes from undef, mark it as such. -      if (UndefElements[MaskVec[i] - Offset]) { -        MaskVec[i] = -1; -        continue; -      } +        // If this input comes from undef, mark it as such. +        if (UndefElements[MaskVec[i] - Offset]) { +          MaskVec[i] = -1; +          continue; +        } -      // If we can blend a non-undef lane, use that instead. -      if (!UndefElements[i]) -        MaskVec[i] = i + Offset; -    } -  }; -  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) -    BlendSplat(N1BV, 0); -  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) -    BlendSplat(N2BV, NElts); +        // If we can blend a non-undef lane, use that instead. +        if (!UndefElements[i]) +          MaskVec[i] = i + Offset; +      } +    }; +    if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) +      BlendSplat(N1BV, 0); +    if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) +      BlendSplat(N2BV, NElts); +  }    // Canonicalize all index into lhs, -> shuffle lhs, undef    // Canonicalize all index into rhs, -> shuffle rhs, undef @@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,  }  SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { -  MVT VT = SV.getSimpleValueType(0); +  EVT VT = SV.getValueType(0);    SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());    ShuffleVectorSDNode::commuteMask(MaskVec); @@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {      return SDValue(E, 0);    auto *N = newSDNode<RegisterSDNode>(RegNo, VT); +  N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);    CSEMap.InsertNode(N, IP);    InsertNode(N);    return SDValue(N, 0); @@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {  SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,                                  ISD::CondCode Cond, const SDLoc &dl) { +  EVT OpVT = N1.getValueType(); +    // These setcc operations always fold.    switch (Cond) {    default: break;    case ISD::SETFALSE: -  case ISD::SETFALSE2: return getConstant(0, dl, VT); +  case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT);    case ISD::SETTRUE: -  case ISD::SETTRUE2: { -    TargetLowering::BooleanContent Cnt = -        TLI->getBooleanContents(N1->getValueType(0)); -    return getConstant( -        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, -        VT); -  } +  case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT);    case ISD::SETOEQ:    case ISD::SETOGT: @@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,        switch (Cond) {        default: llvm_unreachable("Unknown integer setcc!"); -      case ISD::SETEQ:  return getConstant(C1 == C2, dl, VT); -      case ISD::SETNE:  return getConstant(C1 != C2, dl, VT); -      case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); -      case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); -      case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); -      case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); -      case ISD::SETLT:  return getConstant(C1.slt(C2), dl, VT); -      case ISD::SETGT:  return getConstant(C1.sgt(C2), dl, VT); -      case ISD::SETLE:  return getConstant(C1.sle(C2), dl, VT); -      case ISD::SETGE:  return getConstant(C1.sge(C2), dl, VT); +      case ISD::SETEQ:  return getBoolConstant(C1 == C2, dl, VT, OpVT); +      case ISD::SETNE:  return getBoolConstant(C1 != C2, dl, VT, OpVT); +      case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); +      case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); +      case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); +      case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); +      case ISD::SETLT:  return getBoolConstant(C1.slt(C2), dl, VT, OpVT); +      case ISD::SETGT:  return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); +      case ISD::SETLE:  return getBoolConstant(C1.sle(C2), dl, VT, OpVT); +      case ISD::SETGE:  return getBoolConstant(C1.sge(C2), dl, VT, OpVT);        }      }    } @@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,        case ISD::SETEQ:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); +      case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, +                                               OpVT);        case ISD::SETNE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpLessThan, dl, VT); +      case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                               R==APFloat::cmpLessThan, dl, VT, +                                               OpVT);        case ISD::SETLT:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); +      case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, +                                               OpVT);        case ISD::SETGT:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); +      case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, +                                               VT, OpVT);        case ISD::SETLE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || -                                           R==APFloat::cmpEqual, dl, VT); +      case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || +                                               R==APFloat::cmpEqual, dl, VT, +                                               OpVT);        case ISD::SETGE:  if (R==APFloat::cmpUnordered)                            return getUNDEF(VT);                          LLVM_FALLTHROUGH; -      case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpEqual, dl, VT); -      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT); -      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, dl, VT); -      case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || -                                           R==APFloat::cmpEqual, dl, VT); -      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); -      case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || -                                           R==APFloat::cmpLessThan, dl, VT); -      case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || -                                           R==APFloat::cmpUnordered, dl, VT); -      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); -      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); +      case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                           R==APFloat::cmpEqual, dl, VT, OpVT); +      case ISD::SETO:   return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, +                                               OpVT); +      case ISD::SETUO:  return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, +                                               OpVT); +      case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || +                                               R==APFloat::cmpEqual, dl, VT, +                                               OpVT); +      case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, +                                               OpVT); +      case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || +                                               R==APFloat::cmpLessThan, dl, VT, +                                               OpVT); +      case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || +                                               R==APFloat::cmpUnordered, dl, VT, +                                               OpVT); +      case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, +                                               VT, OpVT); +      case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, +                                               OpVT);        }      } else {        // Ensure that the constant occurs on the RHS. @@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,        break;      } -    // Support big-endian targets when it becomes useful.      bool IsLE = getDataLayout().isLittleEndian(); -    if (!IsLE) -      break;      // Bitcast 'small element' vector to 'large element' scalar/vector.      if ((BitWidth % SubBitWidth) == 0) { @@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,        for (unsigned i = 0; i != SubScale; ++i) {          computeKnownBits(N0, Known2, SubDemandedElts.shl(i),                           Depth + 1); -        Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); -        Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); +        unsigned Shifts = IsLE ? i : SubScale - 1 - i; +        Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); +        Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts);        }      } @@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,        Known.Zero.setAllBits(); Known.One.setAllBits();        for (unsigned i = 0; i != NumElts; ++i)          if (DemandedElts[i]) { -          unsigned Offset = (i % SubScale) * BitWidth; +          unsigned Shifts = IsLE ? i : NumElts - 1 - i; +          unsigned Offset = (Shifts % SubScale) * BitWidth;            Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);            Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);            // If we don't know any bits, early out. @@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,      break;    case ISD::SMULO:    case ISD::UMULO: +  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:      if (Op.getResNo() != 1)        break;      // The boolean result conforms to getBooleanContents. @@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,    }    case ISD::SMIN:    case ISD::SMAX: { -    computeKnownBits(Op.getOperand(0), Known, DemandedElts, -                     Depth + 1); -    // If we don't know any bits, early out. -    if (Known.isUnknown()) -      break; +    // If we have a clamp pattern, we know that the number of sign bits will be +    // the minimum of the clamp min/max range. +    bool IsMax = (Opcode == ISD::SMAX); +    ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; +    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) +      if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) +        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), +                                              DemandedElts); +    if (CstLow && CstHigh) { +      if (!IsMax) +        std::swap(CstLow, CstHigh); + +      const APInt &ValueLow = CstLow->getAPIntValue(); +      const APInt &ValueHigh = CstHigh->getAPIntValue(); +      if (ValueLow.sle(ValueHigh)) { +        unsigned LowSignBits = ValueLow.getNumSignBits(); +        unsigned HighSignBits = ValueHigh.getNumSignBits(); +        unsigned MinSignBits = std::min(LowSignBits, HighSignBits); +        if (ValueLow.isNegative() && ValueHigh.isNegative()) { +          Known.One.setHighBits(MinSignBits); +          break; +        } +        if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { +          Known.Zero.setHighBits(MinSignBits); +          break; +        } +      } +    } + +    // Fallback - just get the shared known bits of the operands. +    computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); +    if (Known.isUnknown()) break; // Early-out      computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);      Known.Zero &= Known2.Zero;      Known.One &= Known2.One; @@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,    if (!DemandedElts)      return 1;  // No demanded elts, better to assume we don't know anything. -  switch (Op.getOpcode()) { +  unsigned Opcode = Op.getOpcode(); +  switch (Opcode) {    default: break;    case ISD::AssertSext:      Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); @@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,      return std::min(Tmp, Tmp2);    case ISD::SMIN: -  case ISD::SMAX: +  case ISD::SMAX: { +    // If we have a clamp pattern, we know that the number of sign bits will be +    // the minimum of the clamp min/max range. +    bool IsMax = (Opcode == ISD::SMAX); +    ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; +    if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) +      if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) +        CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), +                                              DemandedElts); +    if (CstLow && CstHigh) { +      if (!IsMax) +        std::swap(CstLow, CstHigh); +      if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { +        Tmp = CstLow->getAPIntValue().getNumSignBits(); +        Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); +        return std::min(Tmp, Tmp2); +      } +    } + +    // Fallback - just get the minimum number of sign bits of the operands. +    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); +    if (Tmp == 1) +      return 1;  // Early out. +    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); +    return std::min(Tmp, Tmp2); +  }    case ISD::UMIN:    case ISD::UMAX:      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,        unsigned RotAmt = C->getAPIntValue().urem(VTBits);        // Handle rotate right by N like a rotate left by 32-N. -      if (Op.getOpcode() == ISD::ROTR) +      if (Opcode == ISD::ROTR)          RotAmt = (VTBits - RotAmt) % VTBits;        // If we aren't rotating out all of the known-in sign bits, return the @@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,    }    // Allow the target to implement this method for its nodes. -  if (Op.getOpcode() >= ISD::BUILTIN_OP_END || -      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || -      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || -      Op.getOpcode() == ISD::INTRINSIC_VOID) { +  if (Opcode >= ISD::BUILTIN_OP_END || +      Opcode == ISD::INTRINSIC_WO_CHAIN || +      Opcode == ISD::INTRINSIC_W_CHAIN || +      Opcode == ISD::INTRINSIC_VOID) {      unsigned NumBits =          TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);      if (NumBits > 1) @@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {    return false;  } -bool SelectionDAG::isKnownNeverZero(SDValue Op) const { +bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { +  assert(Op.getValueType().isFloatingPoint() && +         "Floating point type expected"); +    // If the value is a constant, we can obviously see if it is a zero or not. +  // TODO: Add BuildVector support.    if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))      return !C->isZero(); +  return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { +  assert(!Op.getValueType().isFloatingPoint() && +         "Floating point types unsupported - use isKnownNeverZeroFloat"); + +  // If the value is a constant, we can obviously see if it is a zero or not. +  if (ISD::matchUnaryPredicate( +          Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) +    return true;    // TODO: Recognize more cases here.    switch (Op.getOpcode()) {    default: break;    case ISD::OR: -    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) -      return !C->isNullValue(); +    if (isKnownNeverZero(Op.getOperand(1)) || +        isKnownNeverZero(Op.getOperand(0))) +      return true;      break;    } @@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {    return false;  } +// FIXME: unify with llvm::haveNoCommonBitsSet. +// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)  bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {    assert(A.getValueType() == B.getValueType() &&           "Values must have the same type"); @@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      else if (OpOpcode == ISD::UNDEF)        return getUNDEF(VT); -    // (ext (trunx x)) -> x +    // (ext (trunc x)) -> x      if (OpOpcode == ISD::TRUNCATE) {        SDValue OpOp = Operand.getOperand(0); -      if (OpOp.getValueType() == VT) +      if (OpOp.getValueType() == VT) { +        transferDbgValues(Operand, OpOp);          return OpOp; +      }      }      break;    case ISD::TRUNCATE: @@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    case ISD::FNEG:      // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 -    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) -      // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? +    if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && +        OpOpcode == ISD::FSUB)        return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), -                     Operand.getOperand(0), Operand.getNode()->getFlags()); +                     Operand.getOperand(0), Flags);      if (OpOpcode == ISD::FNEG)  // --X -> X        return Operand.getOperand(0);      break; @@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,    case ISD::FMUL:    case ISD::FDIV:    case ISD::FREM: -    if (getTarget().Options.UnsafeFPMath) { -      if (Opcode == ISD::FADD) { -        // x+0 --> x -        if (N2CFP && N2CFP->getValueAPF().isZero()) -          return N1; -      } else if (Opcode == ISD::FSUB) { -        // x-0 --> x -        if (N2CFP && N2CFP->getValueAPF().isZero()) -          return N1; -      } else if (Opcode == ISD::FMUL) { -        // x*0 --> 0 -        if (N2CFP && N2CFP->isZero()) -          return N2; -        // x*1 --> x -        if (N2CFP && N2CFP->isExactlyValue(1.0)) -          return N1; -      } -    }      assert(VT.isFloatingPoint() && "This operator only applies to FP types!");      assert(N1.getValueType() == N2.getValueType() &&             N1.getValueType() == VT && "Binary operator types must match!"); @@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      break;    }    case ISD::EXTRACT_VECTOR_ELT: +    assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && +           "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ +             element type of the vector."); +      // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.      if (N1.isUndef())        return getUNDEF(VT);      // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF -    if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) +    if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements()))        return getUNDEF(VT);      // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is @@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      }    } +  // Any FP binop with an undef operand is folded to NaN. This matches the +  // behavior of the IR optimizer. +  switch (Opcode) { +  case ISD::FADD: +  case ISD::FSUB: +  case ISD::FMUL: +  case ISD::FDIV: +  case ISD::FREM: +    if (N1.isUndef() || N2.isUndef()) +      return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); +  } +    // Canonicalize an UNDEF to the RHS, even over a constant.    if (N1.isUndef()) {      if (TLI->isCommutativeBinOp(Opcode)) { @@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,        case ISD::FP_ROUND_INREG:        case ISD::SIGN_EXTEND_INREG:        case ISD::SUB: -      case ISD::FSUB: -      case ISD::FDIV: -      case ISD::FREM: -      case ISD::SRA: -        return N1;     // fold op(undef, arg2) -> undef +        return getUNDEF(VT);     // fold op(undef, arg2) -> undef        case ISD::UDIV:        case ISD::SDIV:        case ISD::UREM:        case ISD::SREM: +      case ISD::SRA:        case ISD::SRL:        case ISD::SHL: -        if (!VT.isVector()) -          return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0 -        // For vectors, we can't easily build an all zero vector, just return -        // the LHS. -        return N2; +        return getConstant(0, DL, VT);    // fold op(undef, arg2) -> 0        }      }    } @@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      case ISD::SDIV:      case ISD::UREM:      case ISD::SREM: -      return N2;       // fold op(arg1, undef) -> undef -    case ISD::FADD: -    case ISD::FSUB: -    case ISD::FMUL: -    case ISD::FDIV: -    case ISD::FREM: -      if (getTarget().Options.UnsafeFPMath) -        return N2; -      break; -    case ISD::MUL: -    case ISD::AND: +    case ISD::SRA:      case ISD::SRL:      case ISD::SHL: -      if (!VT.isVector()) -        return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0 -      // For vectors, we can't easily build an all zero vector, just return -      // the LHS. -      return N1; +      return getUNDEF(VT);       // fold op(arg1, undef) -> undef +    case ISD::MUL: +    case ISD::AND: +      return getConstant(0, DL, VT);  // fold op(arg1, undef) -> 0      case ISD::OR: -      if (!VT.isVector()) -        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); -      // For vectors, we can't easily build an all one vector, just return -      // the LHS. -      return N1; -    case ISD::SRA: -      return N1; +      return getAllOnesConstant(DL, VT);      }    } @@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,  }  SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, -                              SDValue N1, SDValue N2, SDValue N3) { +                              SDValue N1, SDValue N2, SDValue N3, +                              const SDNodeFlags Flags) {    // Perform various simplifications.    switch (Opcode) {    case ISD::FMA: { +    assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); +    assert(N1.getValueType() == VT && N2.getValueType() == VT && +           N3.getValueType() == VT && "FMA types must match!");      ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);      ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);      ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); @@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,      FoldingSetNodeID ID;      AddNodeIDNode(ID, Opcode, VTs, Ops);      void *IP = nullptr; -    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) +    if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { +      E->intersectFlagsWith(Flags);        return SDValue(E, 0); +    }      N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); +    N->setFlags(Flags);      createOperands(N, Ops);      CSEMap.InsertNode(N, IP);    } else { @@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {    return MF.getFunction().optForSize();  } +static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, +                          SmallVector<SDValue, 32> &OutChains, unsigned From, +                          unsigned To, SmallVector<SDValue, 16> &OutLoadChains, +                          SmallVector<SDValue, 16> &OutStoreChains) { +  assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); +  assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); +  SmallVector<SDValue, 16> GluedLoadChains; +  for (unsigned i = From; i < To; ++i) { +    OutChains.push_back(OutLoadChains[i]); +    GluedLoadChains.push_back(OutLoadChains[i]); +  } + +  // Chain for all loads. +  SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, +                                  GluedLoadChains); + +  for (unsigned i = From; i < To; ++i) { +    StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); +    SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), +                                  ST->getBasePtr(), ST->getMemoryVT(), +                                  ST->getMemOperand()); +    OutChains.push_back(NewStore); +  } +} +  static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,                                         SDValue Chain, SDValue Dst, SDValue Src,                                         uint64_t Size, unsigned Align, @@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,    MachineMemOperand::Flags MMOFlags =        isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; -  SmallVector<SDValue, 8> OutChains; +  SmallVector<SDValue, 16> OutLoadChains; +  SmallVector<SDValue, 16> OutStoreChains; +  SmallVector<SDValue, 32> OutChains;    unsigned NumMemOps = MemOps.size();    uint64_t SrcOff = 0, DstOff = 0;    for (unsigned i = 0; i != NumMemOps; ++i) { @@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,          SubSlice.Length = VTSize;        }        Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); -      if (Value.getNode()) +      if (Value.getNode()) {          Store = DAG.getStore(Chain, dl, Value,                               DAG.getMemBasePlusOffset(Dst, DstOff, dl),                               DstPtrInfo.getWithOffset(DstOff), Align,                               MMOFlags); +        OutChains.push_back(Store); +      }      }      if (!Store.getNode()) { @@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,                               DAG.getMemBasePlusOffset(Src, SrcOff, dl),                               SrcPtrInfo.getWithOffset(SrcOff), VT,                               MinAlign(SrcAlign, SrcOff), SrcMMOFlags); -      OutChains.push_back(Value.getValue(1)); +      OutLoadChains.push_back(Value.getValue(1)); +        Store = DAG.getTruncStore(            Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),            DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); +      OutStoreChains.push_back(Store);      } -    OutChains.push_back(Store);      SrcOff += VTSize;      DstOff += VTSize;      Size -= VTSize;    } +  unsigned GluedLdStLimit = MaxLdStGlue == 0 ? +                                TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; +  unsigned NumLdStInMemcpy = OutStoreChains.size(); + +  if (NumLdStInMemcpy) { +    // It may be that memcpy might be converted to memset if it's memcpy +    // of constants. In such a case, we won't have loads and stores, but +    // just stores. In the absence of loads, there is nothing to gang up. +    if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { +      // If target does not care, just leave as it. +      for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { +        OutChains.push_back(OutLoadChains[i]); +        OutChains.push_back(OutStoreChains[i]); +      } +    } else { +      // Ld/St less than/equal limit set by target. +      if (NumLdStInMemcpy <= GluedLdStLimit) { +          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, +                                        NumLdStInMemcpy, OutLoadChains, +                                        OutStoreChains); +      } else { +        unsigned NumberLdChain =  NumLdStInMemcpy / GluedLdStLimit; +        unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; +        unsigned GlueIter = 0; + +        for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { +          unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; +          unsigned IndexTo   = NumLdStInMemcpy - GlueIter; + +          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, +                                       OutLoadChains, OutStoreChains); +          GlueIter += GluedLdStLimit; +        } + +        // Residual ld/st. +        if (RemainingLdStInMemcpy) { +          chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, +                                        RemainingLdStInMemcpy, OutLoadChains, +                                        OutStoreChains); +        } +      } +    } +  }    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);  } @@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);  } -/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// Lower the call to 'memset' intrinsic function into a series of store  /// operations.  ///  /// \param DAG Selection DAG where lowered code is placed. @@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,    return CallResult.second;  } +SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, +                                      SDValue Dst, unsigned DstAlign, +                                      SDValue Src, unsigned SrcAlign, +                                      SDValue Size, Type *SizeTy, +                                      unsigned ElemSz, bool isTailCall, +                                      MachinePointerInfo DstPtrInfo, +                                      MachinePointerInfo SrcPtrInfo) { +  // Emit a library call. +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Node = Dst; +  Args.push_back(Entry); + +  Entry.Node = Src; +  Args.push_back(Entry); + +  Entry.Ty = SizeTy; +  Entry.Node = Size; +  Args.push_back(Entry); + +  RTLIB::Libcall LibraryCall = +      RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); +  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) +    report_fatal_error("Unsupported element size"); + +  TargetLowering::CallLoweringInfo CLI(*this); +  CLI.setDebugLoc(dl) +      .setChain(Chain) +      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), +                    Type::getVoidTy(*getContext()), +                    getExternalSymbol(TLI->getLibcallName(LibraryCall), +                                      TLI->getPointerTy(getDataLayout())), +                    std::move(Args)) +      .setDiscardResult() +      .setTailCall(isTailCall); + +  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); +  return CallResult.second; +} +  SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,                                   SDValue Src, SDValue Size, unsigned Align,                                   bool isVol, bool isTailCall, @@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,    return CallResult.second;  } +SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, +                                       SDValue Dst, unsigned DstAlign, +                                       SDValue Src, unsigned SrcAlign, +                                       SDValue Size, Type *SizeTy, +                                       unsigned ElemSz, bool isTailCall, +                                       MachinePointerInfo DstPtrInfo, +                                       MachinePointerInfo SrcPtrInfo) { +  // Emit a library call. +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Node = Dst; +  Args.push_back(Entry); + +  Entry.Node = Src; +  Args.push_back(Entry); + +  Entry.Ty = SizeTy; +  Entry.Node = Size; +  Args.push_back(Entry); + +  RTLIB::Libcall LibraryCall = +      RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); +  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) +    report_fatal_error("Unsupported element size"); + +  TargetLowering::CallLoweringInfo CLI(*this); +  CLI.setDebugLoc(dl) +      .setChain(Chain) +      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), +                    Type::getVoidTy(*getContext()), +                    getExternalSymbol(TLI->getLibcallName(LibraryCall), +                                      TLI->getPointerTy(getDataLayout())), +                    std::move(Args)) +      .setDiscardResult() +      .setTailCall(isTailCall); + +  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); +  return CallResult.second; +} +  SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,                                  SDValue Src, SDValue Size, unsigned Align,                                  bool isVol, bool isTailCall, @@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,    return CallResult.second;  } +SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, +                                      SDValue Dst, unsigned DstAlign, +                                      SDValue Value, SDValue Size, Type *SizeTy, +                                      unsigned ElemSz, bool isTailCall, +                                      MachinePointerInfo DstPtrInfo) { +  // Emit a library call. +  TargetLowering::ArgListTy Args; +  TargetLowering::ArgListEntry Entry; +  Entry.Ty = getDataLayout().getIntPtrType(*getContext()); +  Entry.Node = Dst; +  Args.push_back(Entry); + +  Entry.Ty = Type::getInt8Ty(*getContext()); +  Entry.Node = Value; +  Args.push_back(Entry); + +  Entry.Ty = SizeTy; +  Entry.Node = Size; +  Args.push_back(Entry); + +  RTLIB::Libcall LibraryCall = +      RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); +  if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) +    report_fatal_error("Unsupported element size"); + +  TargetLowering::CallLoweringInfo CLI(*this); +  CLI.setDebugLoc(dl) +      .setChain(Chain) +      .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), +                    Type::getVoidTy(*getContext()), +                    getExternalSymbol(TLI->getLibcallName(LibraryCall), +                                      TLI->getPointerTy(getDataLayout())), +                    std::move(Args)) +      .setDiscardResult() +      .setTailCall(isTailCall); + +  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); +  return CallResult.second; +} +  SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,                                  SDVTList VTList, ArrayRef<SDValue> Ops,                                  MachineMemOperand *MMO) { @@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,    assert((Opcode == ISD::ATOMIC_LOAD_ADD ||            Opcode == ISD::ATOMIC_LOAD_SUB ||            Opcode == ISD::ATOMIC_LOAD_AND || +          Opcode == ISD::ATOMIC_LOAD_CLR ||            Opcode == ISD::ATOMIC_LOAD_OR ||            Opcode == ISD::ATOMIC_LOAD_XOR ||            Opcode == ISD::ATOMIC_LOAD_NAND || @@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,  SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,                                        ArrayRef<SDValue> Ops,                                        MachineMemOperand *MMO) { -  assert(Ops.size() == 5 && "Incompatible number of operands"); +  assert(Ops.size() == 6 && "Incompatible number of operands");    FoldingSetNodeID ID;    AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,    assert(N->getIndex().getValueType().getVectorNumElements() ==               N->getValueType(0).getVectorNumElements() &&           "Vector width mismatch between index and data"); +  assert(isa<ConstantSDNode>(N->getScale()) && +         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && +         "Scale should be a constant power of 2");    CSEMap.InsertNode(N, IP);    InsertNode(N); @@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,  SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,                                         ArrayRef<SDValue> Ops,                                         MachineMemOperand *MMO) { -  assert(Ops.size() == 5 && "Incompatible number of operands"); +  assert(Ops.size() == 6 && "Incompatible number of operands");    FoldingSetNodeID ID;    AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); @@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,    assert(N->getIndex().getValueType().getVectorNumElements() ==               N->getValue().getValueType().getVectorNumElements() &&           "Vector width mismatch between index and data"); +  assert(isa<ConstantSDNode>(N->getScale()) && +         cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && +         "Scale should be a constant power of 2");    CSEMap.InsertNode(N, IP);    InsertNode(N); @@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {    // Now we update the operands.    N->OperandList[0].set(Op); +  updateDivergence(N);    // If this gets put into a CSE map, add it.    if (InsertPos) CSEMap.InsertNode(N, InsertPos);    return N; @@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {    if (N->OperandList[1] != Op2)      N->OperandList[1].set(Op2); +  updateDivergence(N);    // If this gets put into a CSE map, add it.    if (InsertPos) CSEMap.InsertNode(N, InsertPos);    return N; @@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {      if (N->OperandList[i] != Ops[i])        N->OperandList[i].set(Ops[i]); +  updateDivergence(N);    // If this gets put into a CSE map, add it.    if (InsertPos) CSEMap.InsertNode(N, InsertPos);    return N; @@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,  /// FrameIndex  SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,                                                  DIExpression *Expr, unsigned FI, +                                                bool IsIndirect,                                                  const DebugLoc &DL,                                                  unsigned O) {    assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&           "Expected inlined-at fields to agree"); -  return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); +  return new (DbgInfo->getAlloc()) +      SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); +} + +/// VReg +SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, +                                          DIExpression *Expr, +                                          unsigned VReg, bool IsIndirect, +                                          const DebugLoc &DL, unsigned O) { +  assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  return new (DbgInfo->getAlloc()) +      SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG);  }  void SelectionDAG::transferDbgValues(SDValue From, SDValue To, @@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {                          DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());          ClonedDVs.push_back(Clone);          DV->setIsInvalidated(); -        DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); -              dbgs() << " into " << *DIExpr << '\n'); +        LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; +                   N0.getNode()->dumprFull(this); +                   dbgs() << " into " << *DIExpr << '\n');        }      }    } @@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {      AddDbgValue(Dbg, Dbg->getSDNode(), false);  } +/// Creates a SDDbgLabel node. +SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, +                                      const DebugLoc &DL, unsigned O) { +  assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && +         "Expected inlined-at fields to agree"); +  return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); +} +  namespace {  /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node @@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {        SDUse &Use = UI.getUse();        ++UI;        Use.set(To); +      if (To->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User); -      // Now that we have modified User, add it back to the CSE maps.  If it      // already exists there, recursively merge the results together.      AddModifiedNodeToCSEMaps(User); @@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {        SDUse &Use = UI.getUse();        ++UI;        Use.setNode(To); +      if (To->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User);      // Now that we have modified User, add it back to the CSE maps.  If it @@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {        const SDValue &ToOp = To[Use.getResNo()];        ++UI;        Use.set(ToOp); +      if (To->getNode()->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User); -      // Now that we have modified User, add it back to the CSE maps.  If it      // already exists there, recursively merge the results together.      AddModifiedNodeToCSEMaps(User); @@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){        ++UI;        Use.set(To); +      if (To->isDivergent() != From->isDivergent()) +        updateDivergence(User);      } while (UI != UE && *UI == User); -      // We are iterating over all uses of the From node, so if a use      // doesn't use the specific value, no changes are made.      if (!UserRemovedFromCSEMaps) @@ -7419,6 +7804,72 @@ namespace {  } // end anonymous namespace +void SelectionDAG::updateDivergence(SDNode * N) +{ +  if (TLI->isSDNodeAlwaysUniform(N)) +    return; +  bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); +  for (auto &Op : N->ops()) { +    if (Op.Val.getValueType() != MVT::Other) +      IsDivergent |= Op.getNode()->isDivergent(); +  } +  if (N->SDNodeBits.IsDivergent != IsDivergent) { +    N->SDNodeBits.IsDivergent = IsDivergent; +    for (auto U : N->uses()) { +      updateDivergence(U); +    } +  } +} + + +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { +  DenseMap<SDNode *, unsigned> Degree; +  Order.reserve(AllNodes.size()); +  for (auto & N : allnodes()) { +    unsigned NOps = N.getNumOperands(); +    Degree[&N] = NOps; +    if (0 == NOps) +      Order.push_back(&N); +  } +  for (std::vector<SDNode *>::iterator I = Order.begin(); +  I!=Order.end();++I) { +    SDNode * N = *I; +    for (auto U : N->uses()) { +      unsigned &UnsortedOps = Degree[U]; +      if (0 == --UnsortedOps) +        Order.push_back(U); +    } +  } +} + +void SelectionDAG::VerifyDAGDiverence() +{ +  std::vector<SDNode*> TopoOrder; +  CreateTopologicalOrder(TopoOrder); +  const TargetLowering &TLI = getTargetLoweringInfo(); +  DenseMap<const SDNode *, bool> DivergenceMap; +  for (auto &N : allnodes()) { +    DivergenceMap[&N] = false; +  } +  for (auto N : TopoOrder) { +    bool IsDivergent = DivergenceMap[N]; +    bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); +    for (auto &Op : N->ops()) { +      if (Op.Val.getValueType() != MVT::Other) +        IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; +    } +    if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { +      DivergenceMap[N] = true; +    } +  } +  for (auto &N : allnodes()) { +    (void)N; +    assert(DivergenceMap[&N] == N.isDivergent() && +           "Divergence bit inconsistency detected\n"); +  } +} + +  /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving  /// uses of other values produced by From.getNode() alone.  The same value  /// may appear in both the From and To list.  The Deleted vector is @@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,    }    // Sort the uses, so that all the uses from a given User are together. -  std::sort(Uses.begin(), Uses.end()); +  llvm::sort(Uses.begin(), Uses.end());    for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();         UseIndex != UseIndexEnd; ) { @@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {    DbgInfo->add(DB, SD, isParameter);  } +void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { +  DbgInfo->add(DB); +} +  SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,                                                     SDValue NewMemOp) {    assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); @@ -7963,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {    const GlobalValue *GV;    int64_t GVOffset = 0;    if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { -    unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); -    KnownBits Known(PtrWidth); +    unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); +    KnownBits Known(IdxWidth);      llvm::computeKnownBits(GV, Known, getDataLayout());      unsigned AlignBits = Known.countMinTrailingZeros();      unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; @@ -8198,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {    return true;  } -// \brief Returns the SDNode if it is a constant integer BuildVector +// Returns the SDNode if it is a constant integer BuildVector  // or constant integer.  SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {    if (isa<ConstantSDNode>(N)) @@ -8224,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {    return nullptr;  } +void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { +  assert(!Node->OperandList && "Node already has operands"); +  SDUse *Ops = OperandRecycler.allocate( +    ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + +  bool IsDivergent = false; +  for (unsigned I = 0; I != Vals.size(); ++I) { +    Ops[I].setUser(Node); +    Ops[I].setInitial(Vals[I]); +    if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. +      IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); +  } +  Node->NumOperands = Vals.size(); +  Node->OperandList = Ops; +  IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); +  if (!TLI->isSDNodeAlwaysUniform(Node)) +    Node->SDNodeBits.IsDivergent = IsDivergent; +  checkForCycles(Node); +} +  #ifndef NDEBUG  static void checkForCyclesHelper(const SDNode *N,                                   SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index da1574f60524..c859f16e74fe 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -99,16 +99,43 @@ BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N,    }    // Consume constant adds & ors with appropriate masking. -  while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { -    if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { +  while (true) { +    switch (Base->getOpcode()) { +    case ISD::OR:        // Only consider ORs which act as adds. -      if (Base->getOpcode() == ISD::OR && -          !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) -        break; -      Offset += C->getSExtValue(); -      Base = Base->getOperand(0); -      continue; +      if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) +        if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) { +          Offset += C->getSExtValue(); +          Base = Base->getOperand(0); +          continue; +        } +      break; +    case ISD::ADD: +      if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { +        Offset += C->getSExtValue(); +        Base = Base->getOperand(0); +        continue; +      } +      break; +    case ISD::LOAD: +    case ISD::STORE: { +      auto *LSBase = cast<LSBaseSDNode>(Base.getNode()); +      unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0; +      if (LSBase->isIndexed() && Base.getResNo() == IndexResNo) +        if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) { +          auto Off = C->getSExtValue(); +          if (LSBase->getAddressingMode() == ISD::PRE_DEC || +              LSBase->getAddressingMode() == ISD::POST_DEC) +            Offset -= Off; +          else +            Offset += Off; +          Base = LSBase->getBasePtr(); +          continue; +        } +      break; +    }      } +    // If we get here break out of the loop.      break;    } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 68bbd62e1321..1aa8df29af3b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -12,6 +12,7 @@  //===----------------------------------------------------------------------===//  #include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h"  #include "llvm/ADT/APFloat.h"  #include "llvm/ADT/APInt.h"  #include "llvm/ADT/ArrayRef.h" @@ -49,7 +50,6 @@  #include "llvm/CodeGen/MachineModuleInfo.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -102,6 +102,7 @@  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetIntrinsicInfo.h" @@ -777,8 +778,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,      EVT ValueVT = ValueVTs[Value];      unsigned NumRegs = RegCount[Value];      MVT RegisterVT = IsABIMangled -                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) -                         : RegVTs[Value]; +      ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) +      : RegVTs[Value];      Parts.resize(NumRegs);      for (unsigned i = 0; i != NumRegs; ++i) { @@ -818,32 +819,15 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,        // FIXME: We capture more information than the dag can represent.  For        // now, just use the tightest assertzext/assertsext possible. -      bool isSExt = true; +      bool isSExt;        EVT FromVT(MVT::Other); -      if (NumSignBits == RegSize) { -        isSExt = true;   // ASSERT SEXT 1 -        FromVT = MVT::i1; -      } else if (NumZeroBits >= RegSize - 1) { -        isSExt = false;  // ASSERT ZEXT 1 -        FromVT = MVT::i1; -      } else if (NumSignBits > RegSize - 8) { -        isSExt = true;   // ASSERT SEXT 8 -        FromVT = MVT::i8; -      } else if (NumZeroBits >= RegSize - 8) { -        isSExt = false;  // ASSERT ZEXT 8 -        FromVT = MVT::i8; -      } else if (NumSignBits > RegSize - 16) { -        isSExt = true;   // ASSERT SEXT 16 -        FromVT = MVT::i16; -      } else if (NumZeroBits >= RegSize - 16) { -        isSExt = false;  // ASSERT ZEXT 16 -        FromVT = MVT::i16; -      } else if (NumSignBits > RegSize - 32) { -        isSExt = true;   // ASSERT SEXT 32 -        FromVT = MVT::i32; -      } else if (NumZeroBits >= RegSize - 32) { -        isSExt = false;  // ASSERT ZEXT 32 -        FromVT = MVT::i32; +      if (NumZeroBits) { +        FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits); +        isSExt = false; +      } else if (NumSignBits > 1) { +        FromVT = +            EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1); +        isSExt = true;        } else {          continue;        } @@ -876,8 +860,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,      unsigned NumParts = RegCount[Value];      MVT RegisterVT = IsABIMangled -                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) -                         : RegVTs[Value]; +      ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) +      : RegVTs[Value];      if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))        ExtendKind = ISD::ZERO_EXTEND; @@ -970,6 +954,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,    }  } +SmallVector<std::pair<unsigned, unsigned>, 4> +RegsForValue::getRegsAndSizes() const { +  SmallVector<std::pair<unsigned, unsigned>, 4> OutVec; +  unsigned I = 0; +  for (auto CountAndVT : zip_first(RegCount, RegVTs)) { +    unsigned RegCount = std::get<0>(CountAndVT); +    MVT RegisterVT = std::get<1>(CountAndVT); +    unsigned RegisterSize = RegisterVT.getSizeInBits(); +    for (unsigned E = I + RegCount; I != E; ++I) +      OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); +  } +  return OutVec; +} +  void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,                                 const TargetLibraryInfo *li) {    AA = aa; @@ -1054,6 +1052,22 @@ void SelectionDAGBuilder::visit(const Instruction &I) {    visit(I.getOpcode(), I); +  if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { +    // Propagate the fast-math-flags of this IR instruction to the DAG node that +    // maps to this instruction. +    // TODO: We could handle all flags (nsw, etc) here. +    // TODO: If an IR instruction maps to >1 node, only the final node will have +    //       flags set. +    if (SDNode *Node = getNodeForIRValue(&I)) { +      SDNodeFlags IncomingFlags; +      IncomingFlags.copyFMF(*FPMO); +      if (!Node->getFlags().isDefined()) +        Node->setFlags(IncomingFlags); +      else +        Node->intersectFlagsWith(IncomingFlags); +    } +  } +    if (!isa<TerminatorInst>(&I) && !HasTailCall &&        !isStatepoint(&I)) // statepoints handle their exports internally      CopyToExportRegsIfNeeded(&I); @@ -1077,14 +1091,39 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {    }  } +void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, +                                                const DIExpression *Expr) { +  auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { +    const DbgValueInst *DI = DDI.getDI(); +    DIVariable *DanglingVariable = DI->getVariable(); +    DIExpression *DanglingExpr = DI->getExpression(); +    if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { +      LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n"); +      return true; +    } +    return false; +  }; + +  for (auto &DDIMI : DanglingDebugInfoMap) { +    DanglingDebugInfoVector &DDIV = DDIMI.second; +    DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); +  } +} +  // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,  // generate the debug data structures now that we've seen its definition.  void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,                                                     SDValue Val) { -  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; -  if (DDI.getDI()) { +  auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V); +  if (DanglingDbgInfoIt == DanglingDebugInfoMap.end()) +    return; + +  DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; +  for (auto &DDI : DDIV) {      const DbgValueInst *DI = DDI.getDI(); +    assert(DI && "Ill-formed DanglingDebugInfo");      DebugLoc dl = DDI.getdl(); +    unsigned ValSDNodeOrder = Val.getNode()->getIROrder();      unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();      DILocalVariable *Variable = DI->getVariable();      DIExpression *Expr = DI->getExpression(); @@ -1093,13 +1132,26 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,      SDDbgValue *SDV;      if (Val.getNode()) {        if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { -        SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder); +        LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" +                          << DbgSDNodeOrder << "] for:\n  " << *DI << "\n"); +        LLVM_DEBUG(dbgs() << "  By mapping to:\n    "; Val.dump()); +        // Increase the SDNodeOrder for the DbgValue here to make sure it is +        // inserted after the definition of Val when emitting the instructions +        // after ISel. An alternative could be to teach +        // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly. +        LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() +                   << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " +                   << ValSDNodeOrder << "\n"); +        SDV = getDbgValue(Val, Variable, Expr, dl, +                          std::max(DbgSDNodeOrder, ValSDNodeOrder));          DAG.AddDbgValue(SDV, Val.getNode(), false); -      } +      } else +        LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI +                          << "in EmitFuncArgumentDbgValue\n");      } else -      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); -    DanglingDebugInfoMap[V] = DanglingDebugInfo(); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");    } +  DDIV.clear();  }  /// getCopyFromRegs - If there was virtual register allocated for the value V @@ -1315,12 +1367,18 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {    auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());    bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;    bool IsCoreCLR = Pers == EHPersonality::CoreCLR; +  bool IsSEH = isAsynchronousEHPersonality(Pers); +  bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;    MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; +  if (!IsSEH) +    CatchPadMBB->setIsEHScopeEntry();    // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.    if (IsMSVCCXX || IsCoreCLR)      CatchPadMBB->setIsEHFuncletEntry(); - -  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); +  // Wasm does not need catchpads anymore +  if (!IsWasmCXX) +    DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, +                            getControlRoot()));  }  void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { @@ -1363,7 +1421,8 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {  void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {    // Don't emit any special code for the cleanuppad instruction. It just marks -  // the start of a funclet. +  // the start of an EH scope/funclet. +  FuncInfo.MBB->setIsEHScopeEntry();    FuncInfo.MBB->setIsEHFuncletEntry();    FuncInfo.MBB->setIsCleanupFuncletEntry();  } @@ -1385,6 +1444,7 @@ static void findUnwindDestinations(      classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());    bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;    bool IsCoreCLR = Personality == EHPersonality::CoreCLR; +  bool IsSEH = isAsynchronousEHPersonality(Personality);    while (EHPadBB) {      const Instruction *Pad = EHPadBB->getFirstNonPHI(); @@ -1397,6 +1457,7 @@ static void findUnwindDestinations(        // Stop on cleanup pads. Cleanups are always funclet entries for all known        // personalities.        UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); +      UnwindDests.back().first->setIsEHScopeEntry();        UnwindDests.back().first->setIsEHFuncletEntry();        break;      } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { @@ -1406,6 +1467,8 @@ static void findUnwindDestinations(          // For MSVC++ and the CLR, catchblocks are funclets and need prologues.          if (IsMSVCCXX || IsCoreCLR)            UnwindDests.back().first->setIsEHFuncletEntry(); +        if (!IsSEH) +          UnwindDests.back().first->setIsEHScopeEntry();        }        NewEHPadBB = CatchSwitch->getUnwindDest();      } else { @@ -1653,8 +1716,7 @@ SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,    if (!BPI) {      // If BPI is not available, set the default probability as 1 / N, where N is      // the number of successors. -    auto SuccSize = std::max<uint32_t>( -        std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); +    auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);      return BranchProbability(1, SuccSize);    }    return BPI->getEdgeProbability(SrcBB, DstBB); @@ -2489,8 +2551,8 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {      assert(CC.Low == CC.High && "Input clusters must be single-case");  #endif -  std::sort(Clusters.begin(), Clusters.end(), -            [](const CaseCluster &a, const CaseCluster &b) { +  llvm::sort(Clusters.begin(), Clusters.end(), +             [](const CaseCluster &a, const CaseCluster &b) {      return a.Low->getValue().slt(b.Low->getValue());    }); @@ -2551,9 +2613,23 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {  }  void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { -  if (DAG.getTarget().Options.TrapUnreachable) -    DAG.setRoot( -        DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +  if (!DAG.getTarget().Options.TrapUnreachable) +    return; + +  // We may be able to ignore unreachable behind a noreturn call. +  if (DAG.getTarget().Options.NoTrapAfterNoreturn) { +    const BasicBlock &BB = *I.getParent(); +    if (&I != &BB.front()) { +      BasicBlock::const_iterator PredI = +        std::prev(BasicBlock::const_iterator(&I)); +      if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { +        if (Call->doesNotReturn()) +          return; +      } +    } +  } + +  DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));  }  void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2597,6 +2673,10 @@ static bool isVectorReductionOp(const User *I) {    }    unsigned ElemNum = Inst->getType()->getVectorNumElements(); +  // Ensure the reduction size is a power of 2. +  if (!isPowerOf2_32(ElemNum)) +    return false; +    unsigned ElemNumToReduce = ElemNum;    // Do DFS search on the def-use chain from the given instruction. We only @@ -2682,7 +2762,7 @@ static bool isVectorReductionOp(const User *I) {            return false;          const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); -        if (!Val || Val->getZExtValue() != 0) +        if (!Val || !Val->isZero())            return false;          ReduxExtracted = true; @@ -2693,45 +2773,23 @@ static bool isVectorReductionOp(const User *I) {    return ReduxExtracted;  } -void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { -  SDValue Op1 = getValue(I.getOperand(0)); -  SDValue Op2 = getValue(I.getOperand(1)); - -  bool nuw = false; -  bool nsw = false; -  bool exact = false; -  bool vec_redux = false; -  FastMathFlags FMF; - -  if (const OverflowingBinaryOperator *OFBinOp = -          dyn_cast<const OverflowingBinaryOperator>(&I)) { -    nuw = OFBinOp->hasNoUnsignedWrap(); -    nsw = OFBinOp->hasNoSignedWrap(); +void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { +  SDNodeFlags Flags; +  if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) { +    Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); +    Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); +  } +  if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { +    Flags.setExact(ExactOp->isExact());    } -  if (const PossiblyExactOperator *ExactOp = -          dyn_cast<const PossiblyExactOperator>(&I)) -    exact = ExactOp->isExact(); -  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) -    FMF = FPOp->getFastMathFlags(); -    if (isVectorReductionOp(&I)) { -    vec_redux = true; -    DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); +    Flags.setVectorReduction(true); +    LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");    } -  SDNodeFlags Flags; -  Flags.setExact(exact); -  Flags.setNoSignedWrap(nsw); -  Flags.setNoUnsignedWrap(nuw); -  Flags.setVectorReduction(vec_redux); -  Flags.setAllowReciprocal(FMF.allowReciprocal()); -  Flags.setAllowContract(FMF.allowContract()); -  Flags.setNoInfs(FMF.noInfs()); -  Flags.setNoNaNs(FMF.noNaNs()); -  Flags.setNoSignedZeros(FMF.noSignedZeros()); -  Flags.setUnsafeAlgebra(FMF.isFast()); - -  SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), +  SDValue Op1 = getValue(I.getOperand(0)); +  SDValue Op2 = getValue(I.getOperand(1)); +  SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),                                       Op1, Op2, Flags);    setValue(&I, BinNodeValue);  } @@ -2823,13 +2881,12 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {      predicate = FCmpInst::Predicate(FC->getPredicate());    SDValue Op1 = getValue(I.getOperand(0));    SDValue Op2 = getValue(I.getOperand(1)); -  ISD::CondCode Condition = getFCmpCondCode(predicate); -  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. -  // FIXME: We should propagate the fast-math-flags to the DAG node itself for -  // further optimization, but currently FMF is only applicable to binary nodes. -  if (TM.Options.NoNaNsFPMath) +  ISD::CondCode Condition = getFCmpCondCode(predicate); +  auto *FPMO = dyn_cast<FPMathOperator>(&I); +  if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)      Condition = getFCmpCodeWithoutNaN(Condition); +    EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),                                                          I.getType());    setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); @@ -3424,10 +3481,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {                          DAG.getConstant(Offset, dl, N.getValueType()), Flags);        }      } else { -      MVT PtrTy = -          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); -      unsigned PtrSize = PtrTy.getSizeInBits(); -      APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); +      unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); +      MVT IdxTy = MVT::getIntegerVT(IdxSize); +      APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));        // If this is a scalar constant or a splat vector of constants,        // handle it quickly. @@ -3439,11 +3495,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {        if (CI) {          if (CI->isZero())            continue; -        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); +        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);          LLVMContext &Context = *DAG.getContext();          SDValue OffsVal = VectorWidth ? -          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) : -          DAG.getConstant(Offs, dl, PtrTy); +          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : +          DAG.getConstant(Offs, dl, IdxTy);          // In an inbouds GEP with an offset that is nonnegative even when          // interpreted as signed, assume there is no unsigned overflow. @@ -3867,7 +3923,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,  // extract the splat value and use it as a uniform base.  // In all other cases the function returns 'false'.  static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, -                           SelectionDAGBuilder* SDB) { +                           SDValue &Scale, SelectionDAGBuilder* SDB) {    SelectionDAG& DAG = SDB->DAG;    LLVMContext &Context = *DAG.getContext(); @@ -3897,6 +3953,10 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,    if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))      return false; +  const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +  const DataLayout &DL = DAG.getDataLayout(); +  Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), +                                SDB->getCurSDLoc(), TLI.getPointerTy(DL));    Base = SDB->getValue(Ptr);    Index = SDB->getValue(IndexVal); @@ -3926,8 +3986,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {    SDValue Base;    SDValue Index; +  SDValue Scale;    const Value *BasePtr = Ptr; -  bool UniformBase = getUniformBase(BasePtr, Base, Index, this); +  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);    const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;    MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -3935,10 +3996,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {                           MachineMemOperand::MOStore,  VT.getStoreSize(),                           Alignment, AAInfo);    if (!UniformBase) { -    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); +    Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));      Index = getValue(Ptr); +    Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));    } -  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; +  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };    SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,                                           Ops, MMO);    DAG.setRoot(Scatter); @@ -3997,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {    SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,                                     ISD::NON_EXTLOAD, IsExpanding); -  if (AddToChain) { -    SDValue OutChain = Load.getValue(1); -    DAG.setRoot(OutChain); -  } +  if (AddToChain) +    PendingLoads.push_back(Load.getValue(1));    setValue(&I, Load);  } @@ -4025,8 +4085,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {    SDValue Root = DAG.getRoot();    SDValue Base;    SDValue Index; +  SDValue Scale;    const Value *BasePtr = Ptr; -  bool UniformBase = getUniformBase(BasePtr, Base, Index, this); +  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);    bool ConstantMemory = false;    if (UniformBase &&        AA && AA->pointsToConstantMemory(MemoryLocation( @@ -4044,10 +4105,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {                           Alignment, AAInfo, Ranges);    if (!UniformBase) { -    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); +    Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));      Index = getValue(Ptr); +    Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));    } -  SDValue Ops[] = { Root, Src0, Mask, Base, Index }; +  SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };    SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,                                         Ops, MMO); @@ -4868,26 +4930,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(        const auto &TLI = DAG.getTargetLoweringInfo();        RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,                         V->getType(), isABIRegCopy(V)); -      unsigned NumRegs = -          std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0); -      if (NumRegs > 1) { -        unsigned I = 0; +      if (RFV.occupiesMultipleRegs()) {          unsigned Offset = 0; -        auto RegisterVT = RFV.RegVTs.begin(); -        for (auto RegCount : RFV.RegCount) { -          unsigned RegisterSize = (RegisterVT++)->getSizeInBits(); -          for (unsigned E = I + RegCount; I != E; ++I) { -            // The vregs are guaranteed to be allocated in sequence. -            Op = MachineOperand::CreateReg(VMI->second + I, false); -            auto FragmentExpr = DIExpression::createFragmentExpression( -                Expr, Offset, RegisterSize); -            if (!FragmentExpr) -              continue; -            FuncInfo.ArgDbgValues.push_back( -                BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, -                        Op->getReg(), Variable, *FragmentExpr)); -            Offset += RegisterSize; -          } +        for (auto RegAndSize : RFV.getRegsAndSizes()) { +          Op = MachineOperand::CreateReg(RegAndSize.first, false); +          auto FragmentExpr = DIExpression::createFragmentExpression( +              Expr, Offset, RegAndSize.second); +          if (!FragmentExpr) +            continue; +          FuncInfo.ArgDbgValues.push_back( +              BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, +                      Op->getReg(), Variable, *FragmentExpr)); +          Offset += RegAndSize.second;          }          return true;        } @@ -4901,17 +4955,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(    assert(Variable->isValidLocationForIntrinsic(DL) &&           "Expected inlined-at fields to agree"); -  if (Op->isReg()) -    FuncInfo.ArgDbgValues.push_back( -        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, -                Op->getReg(), Variable, Expr)); -  else -    FuncInfo.ArgDbgValues.push_back( -        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) -            .add(*Op) -            .addImm(0) -            .addMetadata(Variable) -            .addMetadata(Expr)); +  IsIndirect = (Op->isReg()) ? IsIndirect : true; +  FuncInfo.ArgDbgValues.push_back( +      BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, +              *Op, Variable, Expr));    return true;  } @@ -4924,13 +4971,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,                                               unsigned DbgSDNodeOrder) {    if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {      // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe -    // stack slot locations as such instead of as indirectly addressed -    // locations. -    return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl, -                                     DbgSDNodeOrder); +    // stack slot locations.  +    // +    // Consider "int x = 0; int *px = &x;". There are two kinds of interesting +    // debug values here after optimization: +    // +    //   dbg.value(i32* %px, !"int *px", !DIExpression()), and +    //   dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) +    // +    // Both describe the direct values of their associated variables. +    return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), +                                     /*IsIndirect*/ false, dl, DbgSDNodeOrder);    } -  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl, -                         DbgSDNodeOrder); +  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), +                         /*IsIndirect*/ false, dl, DbgSDNodeOrder);  }  // VisualStudio defines setjmp as _setjmp @@ -5000,14 +5054,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {    case Intrinsic::longjmp:      return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];    case Intrinsic::memcpy: { +    const auto &MCI = cast<MemCpyInst>(I);      SDValue Op1 = getValue(I.getArgOperand(0));      SDValue Op2 = getValue(I.getArgOperand(1));      SDValue Op3 = getValue(I.getArgOperand(2)); -    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); -    if (!Align) -      Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. -    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); +    // @llvm.memcpy defines 0 and 1 to both mean no alignment. +    unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1); +    unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1); +    unsigned Align = MinAlign(DstAlign, SrcAlign); +    bool isVol = MCI.isVolatile();      bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); +    // FIXME: Support passing different dest/src alignments to the memcpy DAG +    // node.      SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,                                 false, isTC,                                 MachinePointerInfo(I.getArgOperand(0)), @@ -5016,13 +5074,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      return nullptr;    }    case Intrinsic::memset: { +    const auto &MSI = cast<MemSetInst>(I);      SDValue Op1 = getValue(I.getArgOperand(0));      SDValue Op2 = getValue(I.getArgOperand(1));      SDValue Op3 = getValue(I.getArgOperand(2)); -    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); -    if (!Align) -      Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. -    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); +    // @llvm.memset defines 0 and 1 to both mean no alignment. +    unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); +    bool isVol = MSI.isVolatile();      bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());      SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,                                 isTC, MachinePointerInfo(I.getArgOperand(0))); @@ -5030,14 +5088,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      return nullptr;    }    case Intrinsic::memmove: { +    const auto &MMI = cast<MemMoveInst>(I);      SDValue Op1 = getValue(I.getArgOperand(0));      SDValue Op2 = getValue(I.getArgOperand(1));      SDValue Op3 = getValue(I.getArgOperand(2)); -    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); -    if (!Align) -      Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. -    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); +    // @llvm.memmove defines 0 and 1 to both mean no alignment. +    unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1); +    unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1); +    unsigned Align = MinAlign(DstAlign, SrcAlign); +    bool isVol = MMI.isVolatile();      bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); +    // FIXME: Support passing different dest/src alignments to the memmove DAG +    // node.      SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,                                  isTC, MachinePointerInfo(I.getArgOperand(0)),                                  MachinePointerInfo(I.getArgOperand(1))); @@ -5050,36 +5112,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      SDValue Src = getValue(MI.getRawSource());      SDValue Length = getValue(MI.getLength()); -    // Emit a library call. -    TargetLowering::ArgListTy Args; -    TargetLowering::ArgListEntry Entry; -    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); -    Entry.Node = Dst; -    Args.push_back(Entry); - -    Entry.Node = Src; -    Args.push_back(Entry); - -    Entry.Ty = MI.getLength()->getType(); -    Entry.Node = Length; -    Args.push_back(Entry); - -    uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); -    RTLIB::Libcall LibraryCall = -        RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); -    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) -      report_fatal_error("Unsupported element size"); - -    TargetLowering::CallLoweringInfo CLI(DAG); -    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( -        TLI.getLibcallCallingConv(LibraryCall), -        Type::getVoidTy(*DAG.getContext()), -        DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), -                              TLI.getPointerTy(DAG.getDataLayout())), -        std::move(Args)); - -    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); -    DAG.setRoot(CallResult.second); +    unsigned DstAlign = MI.getDestAlignment(); +    unsigned SrcAlign = MI.getSourceAlignment(); +    Type *LengthTy = MI.getLength()->getType(); +    unsigned ElemSz = MI.getElementSizeInBytes(); +    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); +    SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, +                                     SrcAlign, Length, LengthTy, ElemSz, isTC, +                                     MachinePointerInfo(MI.getRawDest()), +                                     MachinePointerInfo(MI.getRawSource())); +    updateDAGForMaybeTailCall(MC);      return nullptr;    }    case Intrinsic::memmove_element_unordered_atomic: { @@ -5088,36 +5130,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      SDValue Src = getValue(MI.getRawSource());      SDValue Length = getValue(MI.getLength()); -    // Emit a library call. -    TargetLowering::ArgListTy Args; -    TargetLowering::ArgListEntry Entry; -    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); -    Entry.Node = Dst; -    Args.push_back(Entry); - -    Entry.Node = Src; -    Args.push_back(Entry); - -    Entry.Ty = MI.getLength()->getType(); -    Entry.Node = Length; -    Args.push_back(Entry); - -    uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); -    RTLIB::Libcall LibraryCall = -        RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); -    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) -      report_fatal_error("Unsupported element size"); - -    TargetLowering::CallLoweringInfo CLI(DAG); -    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( -        TLI.getLibcallCallingConv(LibraryCall), -        Type::getVoidTy(*DAG.getContext()), -        DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), -                              TLI.getPointerTy(DAG.getDataLayout())), -        std::move(Args)); - -    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); -    DAG.setRoot(CallResult.second); +    unsigned DstAlign = MI.getDestAlignment(); +    unsigned SrcAlign = MI.getSourceAlignment(); +    Type *LengthTy = MI.getLength()->getType(); +    unsigned ElemSz = MI.getElementSizeInBytes(); +    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); +    SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, +                                      SrcAlign, Length, LengthTy, ElemSz, isTC, +                                      MachinePointerInfo(MI.getRawDest()), +                                      MachinePointerInfo(MI.getRawSource())); +    updateDAGForMaybeTailCall(MC);      return nullptr;    }    case Intrinsic::memset_element_unordered_atomic: { @@ -5126,37 +5148,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      SDValue Val = getValue(MI.getValue());      SDValue Length = getValue(MI.getLength()); -    // Emit a library call. -    TargetLowering::ArgListTy Args; -    TargetLowering::ArgListEntry Entry; -    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); -    Entry.Node = Dst; -    Args.push_back(Entry); - -    Entry.Ty = Type::getInt8Ty(*DAG.getContext()); -    Entry.Node = Val; -    Args.push_back(Entry); - -    Entry.Ty = MI.getLength()->getType(); -    Entry.Node = Length; -    Args.push_back(Entry); - -    uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); -    RTLIB::Libcall LibraryCall = -        RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); -    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) -      report_fatal_error("Unsupported element size"); - -    TargetLowering::CallLoweringInfo CLI(DAG); -    CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( -        TLI.getLibcallCallingConv(LibraryCall), -        Type::getVoidTy(*DAG.getContext()), -        DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), -                              TLI.getPointerTy(DAG.getDataLayout())), -        std::move(Args)); - -    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); -    DAG.setRoot(CallResult.second); +    unsigned DstAlign = MI.getDestAlignment(); +    Type *LengthTy = MI.getLength()->getType(); +    unsigned ElemSz = MI.getElementSizeInBytes(); +    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); +    SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, +                                     LengthTy, ElemSz, isTC, +                                     MachinePointerInfo(MI.getRawDest())); +    updateDAGForMaybeTailCall(MC);      return nullptr;    }    case Intrinsic::dbg_addr: @@ -5164,13 +5163,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);      DILocalVariable *Variable = DI.getVariable();      DIExpression *Expression = DI.getExpression(); +    dropDanglingDebugInfo(Variable, Expression);      assert(Variable && "Missing variable");      // Check if address has undef value.      const Value *Address = DI.getVariableLocation();      if (!Address || isa<UndefValue>(Address) ||          (Address->use_empty() && !isa<Argument>(Address))) { -      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");        return nullptr;      } @@ -5195,10 +5195,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in      // the MachineFunction variable table.      if (FI != std::numeric_limits<int>::max()) { -      if (Intrinsic == Intrinsic::dbg_addr) -        DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl, -                                                  SDNodeOrder), -                        getRoot().getNode(), isParameter); +      if (Intrinsic == Intrinsic::dbg_addr) { +        SDDbgValue *SDV = DAG.getFrameIndexDbgValue( +            Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); +        DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); +      }        return nullptr;      } @@ -5214,8 +5215,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {        auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());        if (isParameter && FINode) {          // Byval parameter. We have a frame index at this point. -        SDV = DAG.getFrameIndexDbgValue(Variable, Expression, -                                        FINode->getIndex(), dl, SDNodeOrder); +        SDV = +            DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), +                                      /*IsIndirect*/ true, dl, SDNodeOrder);        } else if (isa<Argument>(Address)) {          // Address is an argument, so try to emit its dbg value using          // virtual register info from the FuncInfo.ValueMap. @@ -5231,17 +5233,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {        // virtual register info from the FuncInfo.ValueMap.        if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,                                      N)) { -        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); +        LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");        }      }      return nullptr;    } +  case Intrinsic::dbg_label: { +    const DbgLabelInst &DI = cast<DbgLabelInst>(I); +    DILabel *Label = DI.getLabel(); +    assert(Label && "Missing label"); + +    SDDbgLabel *SDV; +    SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); +    DAG.AddDbgLabel(SDV); +    return nullptr; +  }    case Intrinsic::dbg_value: {      const DbgValueInst &DI = cast<DbgValueInst>(I);      assert(DI.getVariable() && "Missing variable");      DILocalVariable *Variable = DI.getVariable();      DIExpression *Expression = DI.getExpression(); +    dropDanglingDebugInfo(Variable, Expression);      const Value *V = DI.getValue();      if (!V)        return nullptr; @@ -5266,16 +5279,64 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {        return nullptr;      } +    // PHI nodes have already been selected, so we should know which VReg that +    // is assigns to already. +    if (isa<PHINode>(V)) { +      auto VMI = FuncInfo.ValueMap.find(V); +      if (VMI != FuncInfo.ValueMap.end()) { +        unsigned Reg = VMI->second; +        // The PHI node may be split up into several MI PHI nodes (in +        // FunctionLoweringInfo::set). +        RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, +                         V->getType(), false); +        if (RFV.occupiesMultipleRegs()) { +          unsigned Offset = 0; +          unsigned BitsToDescribe = 0; +          if (auto VarSize = Variable->getSizeInBits()) +            BitsToDescribe = *VarSize; +          if (auto Fragment = Expression->getFragmentInfo()) +            BitsToDescribe = Fragment->SizeInBits; +          for (auto RegAndSize : RFV.getRegsAndSizes()) { +            unsigned RegisterSize = RegAndSize.second; +            // Bail out if all bits are described already. +            if (Offset >= BitsToDescribe) +              break; +            unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) +                ? BitsToDescribe - Offset +                : RegisterSize; +            auto FragmentExpr = DIExpression::createFragmentExpression( +                Expression, Offset, FragmentSize); +            if (!FragmentExpr) +                continue; +            SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first, +                                      false, dl, SDNodeOrder); +            DAG.AddDbgValue(SDV, nullptr, false); +            Offset += RegisterSize; +          } +        } else { +          SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl, +                                    SDNodeOrder); +          DAG.AddDbgValue(SDV, nullptr, false); +        } +        return nullptr; +      } +    } + +    // TODO: When we get here we will either drop the dbg.value completely, or +    // we try to move it forward by letting it dangle for awhile. So we should +    // probably add an extra DbgValue to the DAG here, with a reference to +    // "noreg", to indicate that we have lost the debug location for the +    // variable. +      if (!V->use_empty() ) {        // Do not call getValue(V) yet, as we don't want to generate code.        // Remember it for later. -      DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); -      DanglingDebugInfoMap[V] = DDI; +      DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);        return nullptr;      } -    DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n"); -    DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n"); +    LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n"); +    LLVM_DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");      return nullptr;    } @@ -5609,6 +5670,52 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));      return nullptr;    } +  case Intrinsic::fshl: +  case Intrinsic::fshr: { +    bool IsFSHL = Intrinsic == Intrinsic::fshl; +    SDValue X = getValue(I.getArgOperand(0)); +    SDValue Y = getValue(I.getArgOperand(1)); +    SDValue Z = getValue(I.getArgOperand(2)); +    EVT VT = X.getValueType(); + +    // When X == Y, this is rotate. Create the node directly if legal. +    // TODO: This should also be done if the operation is custom, but we have +    // to make sure targets are handling the modulo shift amount as expected. +    // TODO: If the rotate direction (left or right) corresponding to the shift +    // is not available, adjust the shift value and invert the direction. +    auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; +    if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) { +      setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); +      return nullptr; +    } + +    // Get the shift amount and inverse shift amount, modulo the bit-width. +    SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); +    SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); +    SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z); +    SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); + +    // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW)) +    // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW)) +    SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt); +    SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt); +    SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); + +    // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would +    // be X | Y. If X == Y (rotate), that's fine. If not, we have to select. +    if (X != Y) { +      SDValue Zero = DAG.getConstant(0, sdl, VT); +      EVT CCVT = MVT::i1; +      if (VT.isVector()) +        CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); +      // For fshl, 0 shift returns the 1st arg (X). +      // For fshr, 0 shift returns the 2nd arg (Y). +      SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); +      Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res); +    } +    setValue(&I, Res); +    return nullptr; +  }    case Intrinsic::stacksave: {      SDValue Op = getRoot();      Res = DAG.getNode( @@ -5703,7 +5810,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {    }    case Intrinsic::annotation:    case Intrinsic::ptr_annotation: -  case Intrinsic::invariant_group_barrier: +  case Intrinsic::launder_invariant_group: +  case Intrinsic::strip_invariant_group:      // Drop the intrinsic, but forward the value      setValue(&I, getValue(I.getOperand(0)));      return nullptr; @@ -5822,17 +5930,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      SDValue Ops[5];      unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();      auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; -    Ops[0] = getRoot(); +    Ops[0] = DAG.getRoot();      Ops[1] = getValue(I.getArgOperand(0));      Ops[2] = getValue(I.getArgOperand(1));      Ops[3] = getValue(I.getArgOperand(2));      Ops[4] = getValue(I.getArgOperand(3)); -    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, -                                        DAG.getVTList(MVT::Other), Ops, -                                        EVT::getIntegerVT(*Context, 8), -                                        MachinePointerInfo(I.getArgOperand(0)), -                                        0, /* align */ -                                        Flags)); +    SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, +                                             DAG.getVTList(MVT::Other), Ops, +                                             EVT::getIntegerVT(*Context, 8), +                                             MachinePointerInfo(I.getArgOperand(0)), +                                             0, /* align */ +                                             Flags); + +    // Chain the prefetch in parallell with any pending loads, to stay out of +    // the way of later optimizations. +    PendingLoads.push_back(Result); +    Result = getRoot(); +    DAG.setRoot(Result);      return nullptr;    }    case Intrinsic::lifetime_start: @@ -6004,6 +6118,41 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {      setValue(&I, patchableNode);      return nullptr;    } +  case Intrinsic::xray_typedevent: { +    // Here we want to make sure that the intrinsic behaves as if it has a +    // specific calling convention, and only for x86_64. +    // FIXME: Support other platforms later. +    const auto &Triple = DAG.getTarget().getTargetTriple(); +    if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) +      return nullptr; + +    SDLoc DL = getCurSDLoc(); +    SmallVector<SDValue, 8> Ops; + +    // We want to say that we always want the arguments in registers. +    // It's unclear to me how manipulating the selection DAG here forces callers +    // to provide arguments in registers instead of on the stack. +    SDValue LogTypeId = getValue(I.getArgOperand(0)); +    SDValue LogEntryVal = getValue(I.getArgOperand(1)); +    SDValue StrSizeVal = getValue(I.getArgOperand(2)); +    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); +    SDValue Chain = getRoot(); +    Ops.push_back(LogTypeId); +    Ops.push_back(LogEntryVal); +    Ops.push_back(StrSizeVal); +    Ops.push_back(Chain); + +    // We need to enforce the calling convention for the callsite, so that +    // argument ordering is enforced correctly, and that register allocation can +    // see that some registers may be assumed clobbered and have to preserve +    // them across calls to the intrinsic. +    MachineSDNode *MN = DAG.getMachineNode( +        TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); +    SDValue patchableNode = SDValue(MN, 0); +    DAG.setRoot(patchableNode); +    setValue(&I, patchableNode); +    return nullptr; +  }    case Intrinsic::experimental_deoptimize:      LowerDeoptimizeCall(&I);      return nullptr; @@ -6023,6 +6172,66 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {    case Intrinsic::experimental_vector_reduce_fmin:      visitVectorReduce(I, Intrinsic);      return nullptr; + +  case Intrinsic::icall_branch_funnel: { +    SmallVector<SDValue, 16> Ops; +    Ops.push_back(DAG.getRoot()); +    Ops.push_back(getValue(I.getArgOperand(0))); + +    int64_t Offset; +    auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( +        I.getArgOperand(1), Offset, DAG.getDataLayout())); +    if (!Base) +      report_fatal_error( +          "llvm.icall.branch.funnel operand must be a GlobalValue"); +    Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); + +    struct BranchFunnelTarget { +      int64_t Offset; +      SDValue Target; +    }; +    SmallVector<BranchFunnelTarget, 8> Targets; + +    for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { +      auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( +          I.getArgOperand(Op), Offset, DAG.getDataLayout())); +      if (ElemBase != Base) +        report_fatal_error("all llvm.icall.branch.funnel operands must refer " +                           "to the same GlobalValue"); + +      SDValue Val = getValue(I.getArgOperand(Op + 1)); +      auto *GA = dyn_cast<GlobalAddressSDNode>(Val); +      if (!GA) +        report_fatal_error( +            "llvm.icall.branch.funnel operand must be a GlobalValue"); +      Targets.push_back({Offset, DAG.getTargetGlobalAddress( +                                     GA->getGlobal(), getCurSDLoc(), +                                     Val.getValueType(), GA->getOffset())}); +    } +    llvm::sort(Targets.begin(), Targets.end(), +               [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { +                 return T1.Offset < T2.Offset; +               }); + +    for (auto &T : Targets) { +      Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); +      Ops.push_back(T.Target); +    } + +    SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, +                                 getCurSDLoc(), MVT::Other, Ops), +              0); +    DAG.setRoot(N); +    setValue(&I, N); +    HasTailCall = true; +    return nullptr; +  } + +  case Intrinsic::wasm_landingpad_index: { +    // TODO store landing pad index in a map, which will be used when generating +    // LSDA information +    return nullptr; +  }    }  } @@ -6172,7 +6381,10 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,      DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));      // Inform MachineModuleInfo of range. -    if (MF.hasEHFunclets()) { +    auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); +    // There is a platform (e.g. wasm) that uses funclet style IR but does not +    // actually use outlined funclets and their LSDA info style. +    if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {        assert(CLI.CS);        WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();        EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), @@ -6630,14 +6842,13 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {    const char *RenameFn = nullptr;    if (Function *F = I.getCalledFunction()) {      if (F->isDeclaration()) { -      if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { -        if (unsigned IID = II->getIntrinsicID(F)) { -          RenameFn = visitIntrinsicCall(I, IID); -          if (!RenameFn) -            return; -        } -      } -      if (Intrinsic::ID IID = F->getIntrinsicID()) { +      // Is this an LLVM intrinsic or a target-specific intrinsic? +      unsigned IID = F->getIntrinsicID(); +      if (!IID) +        if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) +          IID = II->getIntrinsicID(F); + +      if (IID) {          RenameFn = visitIntrinsicCall(I, IID);          if (!RenameFn)            return; @@ -6989,27 +7200,37 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,    unsigned NumRegs = 1;    if (OpInfo.ConstraintVT != MVT::Other) { -    // If this is a FP input in an integer register (or visa versa) insert a bit -    // cast of the input value.  More generally, handle any case where the input -    // value disagrees with the register class we plan to stick this in. -    if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && +    // If this is a FP operand in an integer register (or visa versa), or more +    // generally if the operand value disagrees with the register class we plan +    // to stick it in, fix the operand type. +    // +    // If this is an input value, the bitcast to the new type is done now. +    // Bitcast for output value is done at the end of visitInlineAsm(). +    if ((OpInfo.Type == InlineAsm::isOutput || +         OpInfo.Type == InlineAsm::isInput) && +        PhysReg.second &&          !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {        // Try to convert to the first EVT that the reg class contains.  If the        // types are identical size, use a bitcast to convert (e.g. two differing -      // vector types). +      // vector types).  Note: output bitcast is done at the end of +      // visitInlineAsm().        MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); -      if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { -        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, -                                         RegVT, OpInfo.CallOperand); +      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { +        // Exclude indirect inputs while they are unsupported because the code +        // to perform the load is missing and thus OpInfo.CallOperand still +        // refer to the input address rather than the pointed-to value. +        if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect) +          OpInfo.CallOperand = +              DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);          OpInfo.ConstraintVT = RegVT; +        // If the operand is a FP value and we want it in integer registers, +        // use the corresponding integer type. This turns an f64 value into +        // i64, which can be passed with two i32 values on a 32-bit machine.        } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { -        // If the input is a FP value and we want it in FP registers, do a -        // bitcast to the corresponding integer type.  This turns an f64 value -        // into i64, which can be passed with two i32 values on a 32-bit -        // machine.          RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); -        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, -                                         RegVT, OpInfo.CallOperand); +        if (OpInfo.Type == InlineAsm::isInput) +          OpInfo.CallOperand = +              DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);          OpInfo.ConstraintVT = RegVT;        }      } @@ -7246,7 +7467,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {        continue;      // If this is a memory input, and if the operand is not indirect, do what we -    // need to to provide an address for the memory input. +    // need to provide an address for the memory input.      if (OpInfo.ConstraintType == TargetLowering::C_Memory &&          !OpInfo.isIndirect) {        assert((OpInfo.isMultipleAlternative || @@ -7521,12 +7742,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {      if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {        EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); -      // If any of the results of the inline asm is a vector, it may have the -      // wrong width/num elts.  This can happen for register classes that can -      // contain multiple different value types.  The preg or vreg allocated may -      // not have the same VT as was expected.  Convert it to the right type -      // with bit_convert. -      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { +      // If the type of the inline asm call site return value is different but +      // has same size as the type of the asm output bitcast it.  One example +      // of this is for vectors with different width / number of elements. +      // This can happen for register classes that can contain multiple +      // different value types.  The preg or vreg allocated may not have the +      // same VT as was expected. +      // +      // This can also happen for a return value that disagrees with the +      // register class it is put in, eg. a double in a general-purpose +      // register on a 32-bit machine. +      if (ResultType != Val.getValueType() && +          ResultType.getSizeInBits() == Val.getValueSizeInBits()) {          Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),                            ResultType, Val); @@ -7581,8 +7808,17 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,    // Make sure we leave the DAG in a valid state    const TargetLowering &TLI = DAG.getTargetLoweringInfo(); -  auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); -  setValue(CS.getInstruction(), DAG.getUNDEF(VT)); +  SmallVector<EVT, 1> ValueVTs; +  ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + +  if (ValueVTs.empty()) +    return; + +  SmallVector<SDValue, 1> Ops; +  for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i) +    Ops.push_back(DAG.getUNDEF(ValueVTs[i])); + +  setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));  }  void SelectionDAGBuilder::visitVAStart(const CallInst &I) { @@ -7656,7 +7892,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,    return DAG.getMergeValues(Ops, SL);  } -/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of +/// Populate a CallLowerinInfo (into \p CLI) based on the properties of  /// the call being lowered.  ///  /// This is a helper for lowering intrinsics that follow a target calling @@ -7680,7 +7916,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(      TargetLowering::ArgListEntry Entry;      Entry.Node = getValue(V);      Entry.Ty = V->getType(); -    Entry.setAttributes(&CS, ArgIdx); +    Entry.setAttributes(&CS, ArgI);      Args.push_back(Entry);    } @@ -7691,7 +7927,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(        .setIsPatchPoint(IsPatchPoint);  } -/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// Add a stack map intrinsic call's live variable operands to a stackmap  /// or patchpoint target node's operand list.  ///  /// Constants are converted to TargetConstants purely as an optimization to @@ -7727,7 +7963,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,    }  } -/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +/// Lower llvm.experimental.stackmap directly to its target opcode.  void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {    // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,    //                                  [live variables...]) @@ -7790,7 +8026,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {    FuncInfo.MF->getFrameInfo().setHasStackMap();  } -/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +/// Lower llvm.experimental.patchpoint directly to its target opcode.  void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,                                            const BasicBlock *EHPadBB) {    // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, @@ -7954,8 +8190,6 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,    FastMathFlags FMF;    if (isa<FPMathOperator>(I))      FMF = I.getFastMathFlags(); -  SDNodeFlags SDFlags; -  SDFlags.setNoNaNs(FMF.noNaNs());    switch (Intrinsic) {    case Intrinsic::experimental_vector_reduce_fadd: @@ -7998,10 +8232,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,      Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);      break;    case Intrinsic::experimental_vector_reduce_fmax: -    Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); +    Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);      break;    case Intrinsic::experimental_vector_reduce_fmin: -    Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); +    Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);      break;    default:      llvm_unreachable("Unhandled vector reduce intrinsic"); @@ -8220,8 +8454,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {        else if (Args[i].IsZExt)          ExtendKind = ISD::ZERO_EXTEND; -      // Conservatively only handle 'returned' on non-vectors for now -      if (Args[i].IsReturned && !Op.getValueType().isVector()) { +      // Conservatively only handle 'returned' on non-vectors that can be lowered, +      // for now. +      if (Args[i].IsReturned && !Op.getValueType().isVector() && +          CanLowerReturn) {          assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&                 "unexpected use of 'returned'");          // Before passing 'returned' to the target lowering code, ensure that @@ -8500,7 +8736,8 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,        continue;      } -    DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); +    LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI +                      << '\n');      // Mark this alloca and store for argument copy elision.      *Info = StaticAllocaInfo::Elidable; @@ -8541,8 +8778,9 @@ static void tryToElideArgumentCopy(    int OldIndex = AllocaIndex;    MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();    if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { -    DEBUG(dbgs() << "  argument copy elision failed due to bad fixed stack " -                    "object size\n"); +    LLVM_DEBUG( +        dbgs() << "  argument copy elision failed due to bad fixed stack " +                  "object size\n");      return;    }    unsigned RequiredAlignment = AI->getAlignment(); @@ -8551,16 +8789,16 @@ static void tryToElideArgumentCopy(          AI->getAllocatedType());    }    if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { -    DEBUG(dbgs() << "  argument copy elision failed: alignment of alloca " -                    "greater than stack argument alignment (" -                 << RequiredAlignment << " vs " -                 << MFI.getObjectAlignment(FixedIndex) << ")\n"); +    LLVM_DEBUG(dbgs() << "  argument copy elision failed: alignment of alloca " +                         "greater than stack argument alignment (" +                      << RequiredAlignment << " vs " +                      << MFI.getObjectAlignment(FixedIndex) << ")\n");      return;    }    // Perform the elision. Delete the old stack object and replace its only use    // in the variable info map. Mark the stack object as mutable. -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'             << "  Replacing frame index " << OldIndex << " with " << FixedIndex             << '\n'; @@ -8732,14 +8970,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {           "LowerFormalArguments didn't return a valid chain!");    assert(InVals.size() == Ins.size() &&           "LowerFormalArguments didn't emit the correct number of values!"); -  DEBUG({ -      for (unsigned i = 0, e = Ins.size(); i != e; ++i) { -        assert(InVals[i].getNode() && -               "LowerFormalArguments emitted a null value!"); -        assert(EVT(Ins[i].VT) == InVals[i].getValueType() && -               "LowerFormalArguments emitted a value with the wrong type!"); -      } -    }); +  LLVM_DEBUG({ +    for (unsigned i = 0, e = Ins.size(); i != e; ++i) { +      assert(InVals[i].getNode() && +             "LowerFormalArguments emitted a null value!"); +      assert(EVT(Ins[i].VT) == InVals[i].getValueType() && +             "LowerFormalArguments emitted a value with the wrong type!"); +    } +  });    // Update the DAG with the new chain value resulting from argument lowering.    DAG.setRoot(NewRoot); @@ -9351,7 +9589,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,    }    BitTestInfo BTI; -  std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { +  llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {      // Sort by probability first, number of bits second, bit mask third.      if (a.ExtraProb != b.ExtraProb)        return a.ExtraProb > b.ExtraProb; @@ -9550,15 +9788,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,      // checked first. However, two clusters can have the same probability in      // which case their relative ordering is non-deterministic. So we use Low      // as a tie-breaker as clusters are guaranteed to never overlap. -    std::sort(W.FirstCluster, W.LastCluster + 1, -              [](const CaseCluster &a, const CaseCluster &b) { +    llvm::sort(W.FirstCluster, W.LastCluster + 1, +               [](const CaseCluster &a, const CaseCluster &b) {        return a.Prob != b.Prob ?               a.Prob > b.Prob :               a.Low->getValue().slt(b.Low->getValue());      });      // Rearrange the case blocks so that the last one falls through if possible -    // without without changing the order of probabilities. +    // without changing the order of probabilities.      for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {        --I;        if (I->Prob > W.LastCluster->Prob) @@ -9883,8 +10121,8 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(    if (!SwitchPeeled)      return SwitchMBB; -  DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb -               << "\n"); +  LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " +                    << TopCaseProb << "\n");    // Record the MBB for the peeled switch statement.    MachineFunction::iterator BBI(SwitchMBB); @@ -9901,10 +10139,11 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(    Clusters.erase(PeeledCaseIt);    for (CaseCluster &CC : Clusters) { -    DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " -                 << CC.Prob << "\n"); +    LLVM_DEBUG( +        dbgs() << "Scale the probablity for one cluster, before scaling: " +               << CC.Prob << "\n");      CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); -    DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); +    LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");    }    PeeledCaseProb = TopCaseProb;    return PeeledSwitchMBB; @@ -9983,11 +10222,13 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {    findJumpTables(Clusters, &SI, DefaultMBB);    findBitTestClusters(Clusters, &SI); -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "Case clusters: ";      for (const CaseCluster &C : Clusters) { -      if (C.Kind == CC_JumpTable) dbgs() << "JT:"; -      if (C.Kind == CC_BitTests) dbgs() << "BT:"; +      if (C.Kind == CC_JumpTable) +        dbgs() << "JT:"; +      if (C.Kind == CC_BitTests) +        dbgs() << "BT:";        C.Low->getValue().print(dbgs(), true);        if (C.Low != C.High) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9e7c2bc6821b..e421984b8af2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -21,7 +21,6 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/TargetLowering.h" @@ -33,6 +32,7 @@  #include "llvm/Support/BranchProbability.h"  #include "llvm/Support/CodeGen.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include <algorithm>  #include <cassert>  #include <cstdint> @@ -116,9 +116,12 @@ class SelectionDAGBuilder {      unsigned getSDNodeOrder() { return SDNodeOrder; }    }; +  /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap. +  typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector; +    /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not    /// yet seen the referent.  We defer handling these until we do see it. -  DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; +  DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;  public:    /// PendingLoads - Loads are not emitted to the program immediately.  We bunch @@ -671,6 +674,12 @@ public:    /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.    SDValue getCopyFromRegs(const Value *V, Type *Ty); +  /// If we have dangling debug info that describes \p Variable, or an +  /// overlapping part of variable considering the \p Expr, then this method +  /// weill drop that debug info as it isn't valid any longer. +  void dropDanglingDebugInfo(const DILocalVariable *Variable, +                             const DIExpression *Expr); +    // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,    // generate the debug data structures now that we've seen its definition.    void resolveDanglingDebugInfo(const Value *V, SDValue Val); @@ -678,6 +687,13 @@ public:    SDValue getValue(const Value *V);    bool findValue(const Value *V) const; +  /// Return the SDNode for the specified IR value if it exists. +  SDNode *getNodeForIRValue(const Value *V) { +    if (NodeMap.find(V) == NodeMap.end()) +      return nullptr; +    return NodeMap[V].getNode(); +  } +    SDValue getNonRegisterValue(const Value *V);    SDValue getValueImpl(const Value *V); @@ -696,13 +712,13 @@ public:    void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,                              MachineBasicBlock *FBB, MachineBasicBlock *CurBB,                              MachineBasicBlock *SwitchBB, -                            Instruction::BinaryOps Opc, BranchProbability TW, -                            BranchProbability FW, bool InvertCond); +                            Instruction::BinaryOps Opc, BranchProbability TProb, +                            BranchProbability FProb, bool InvertCond);    void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,                                      MachineBasicBlock *FBB,                                      MachineBasicBlock *CurBB,                                      MachineBasicBlock *SwitchBB, -                                    BranchProbability TW, BranchProbability FW, +                                    BranchProbability TProb, BranchProbability FProb,                                      bool InvertCond);    bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);    bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); @@ -774,11 +790,11 @@ public:    };    /// Lower \p SLI into a STATEPOINT instruction. -  SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI); +  SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI);    // This function is responsible for the whole statepoint lowering process.    // It uniformly handles invoke and call statepoints. -  void LowerStatepoint(ImmutableStatepoint Statepoint, +  void LowerStatepoint(ImmutableStatepoint ISP,                         const BasicBlock *EHPadBB = nullptr);    void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee, @@ -838,7 +854,7 @@ private:    void visitInvoke(const InvokeInst &I);    void visitResume(const ResumeInst &I); -  void visitBinary(const User &I, unsigned OpCode); +  void visitBinary(const User &I, unsigned Opcode);    void visitShift(const User &I, unsigned Opcode);    void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }    void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } @@ -881,7 +897,7 @@ private:    void visitExtractValue(const User &I);    void visitInsertValue(const User &I); -  void visitLandingPad(const LandingPadInst &I); +  void visitLandingPad(const LandingPadInst &LP);    void visitGetElementPtr(const User &I);    void visitSelect(const User &I); @@ -926,7 +942,7 @@ private:                         const BasicBlock *EHPadBB = nullptr);    // These two are implemented in StatepointLowering.cpp -  void visitGCRelocate(const GCRelocateInst &I); +  void visitGCRelocate(const GCRelocateInst &Relocate);    void visitGCResult(const GCResultInst &I);    void visitVectorReduce(const CallInst &I, unsigned Intrinsic); @@ -1036,9 +1052,17 @@ struct RegsForValue {    /// Add this value to the specified inlineasm node operand list. This adds the    /// code marker, matching input operand index (if applicable), and includes    /// the number of values added into it. -  void AddInlineAsmOperands(unsigned Kind, bool HasMatching, +  void AddInlineAsmOperands(unsigned Code, bool HasMatching,                              unsigned MatchingIdx, const SDLoc &dl,                              SelectionDAG &DAG, std::vector<SDValue> &Ops) const; + +  /// Check if the total RegCount is greater than one. +  bool occupiesMultipleRegs() const { +    return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1; +  } + +  /// Return a list of registers and their sizes. +  SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const;  };  } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index dd30dc16378c..fa341e8b5fa5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -20,7 +20,6 @@  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineConstantPool.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/TargetInstrInfo.h" @@ -28,18 +27,21 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/ModuleSlotTracker.h"  #include "llvm/IR/Value.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/Printable.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetIntrinsicInfo.h" @@ -85,6 +87,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {    case ISD::ATOMIC_LOAD_ADD:            return "AtomicLoadAdd";    case ISD::ATOMIC_LOAD_SUB:            return "AtomicLoadSub";    case ISD::ATOMIC_LOAD_AND:            return "AtomicLoadAnd"; +  case ISD::ATOMIC_LOAD_CLR:            return "AtomicLoadClr";    case ISD::ATOMIC_LOAD_OR:             return "AtomicLoadOr";    case ISD::ATOMIC_LOAD_XOR:            return "AtomicLoadXor";    case ISD::ATOMIC_LOAD_NAND:           return "AtomicLoadNand"; @@ -176,20 +179,30 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {    case ISD::FMAXNAN:                    return "fmaxnan";    case ISD::FNEG:                       return "fneg";    case ISD::FSQRT:                      return "fsqrt"; +  case ISD::STRICT_FSQRT:               return "strict_fsqrt";    case ISD::FSIN:                       return "fsin"; +  case ISD::STRICT_FSIN:                return "strict_fsin";    case ISD::FCOS:                       return "fcos"; +  case ISD::STRICT_FCOS:                return "strict_fcos";    case ISD::FSINCOS:                    return "fsincos";    case ISD::FTRUNC:                     return "ftrunc";    case ISD::FFLOOR:                     return "ffloor";    case ISD::FCEIL:                      return "fceil";    case ISD::FRINT:                      return "frint"; +  case ISD::STRICT_FRINT:               return "strict_frint";    case ISD::FNEARBYINT:                 return "fnearbyint"; +  case ISD::STRICT_FNEARBYINT:          return "strict_fnearbyint";    case ISD::FROUND:                     return "fround";    case ISD::FEXP:                       return "fexp"; +  case ISD::STRICT_FEXP:                return "strict_fexp";    case ISD::FEXP2:                      return "fexp2"; +  case ISD::STRICT_FEXP2:               return "strict_fexp2";    case ISD::FLOG:                       return "flog"; +  case ISD::STRICT_FLOG:                return "strict_flog";    case ISD::FLOG2:                      return "flog2"; +  case ISD::STRICT_FLOG2:               return "strict_flog2";    case ISD::FLOG10:                     return "flog10"; +  case ISD::STRICT_FLOG10:              return "strict_flog10";    // Binary operators    case ISD::ADD:                        return "add"; @@ -214,24 +227,31 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {    case ISD::ROTL:                       return "rotl";    case ISD::ROTR:                       return "rotr";    case ISD::FADD:                       return "fadd"; +  case ISD::STRICT_FADD:                return "strict_fadd";    case ISD::FSUB:                       return "fsub"; +  case ISD::STRICT_FSUB:                return "strict_fsub";    case ISD::FMUL:                       return "fmul"; +  case ISD::STRICT_FMUL:                return "strict_fmul";    case ISD::FDIV:                       return "fdiv"; +  case ISD::STRICT_FDIV:                return "strict_fdiv";    case ISD::FMA:                        return "fma"; +  case ISD::STRICT_FMA:                 return "strict_fma";    case ISD::FMAD:                       return "fmad";    case ISD::FREM:                       return "frem"; +  case ISD::STRICT_FREM:                return "strict_frem";    case ISD::FCOPYSIGN:                  return "fcopysign";    case ISD::FGETSIGN:                   return "fgetsign";    case ISD::FCANONICALIZE:              return "fcanonicalize";    case ISD::FPOW:                       return "fpow"; +  case ISD::STRICT_FPOW:                return "strict_fpow";    case ISD::SMIN:                       return "smin";    case ISD::SMAX:                       return "smax";    case ISD::UMIN:                       return "umin";    case ISD::UMAX:                       return "umax";    case ISD::FPOWI:                      return "fpowi"; +  case ISD::STRICT_FPOWI:               return "strict_fpowi";    case ISD::SETCC:                      return "setcc"; -  case ISD::SETCCE:                     return "setcce";    case ISD::SETCCCARRY:                 return "setcccarry";    case ISD::SELECT:                     return "select";    case ISD::VSELECT:                    return "vselect"; @@ -366,7 +386,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {      case ISD::SETFALSE2:                return "setfalse2";      }    case ISD::VECREDUCE_FADD:             return "vecreduce_fadd"; +  case ISD::VECREDUCE_STRICT_FADD:      return "vecreduce_strict_fadd";    case ISD::VECREDUCE_FMUL:             return "vecreduce_fmul"; +  case ISD::VECREDUCE_STRICT_FMUL:      return "vecreduce_strict_fmul";    case ISD::VECREDUCE_ADD:              return "vecreduce_add";    case ISD::VECREDUCE_MUL:              return "vecreduce_mul";    case ISD::VECREDUCE_AND:              return "vecreduce_and"; @@ -401,6 +423,32 @@ static Printable PrintNodeId(const SDNode &Node) {    });  } +// Print the MMO with more information from the SelectionDAG. +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, +                            const MachineFunction *MF, const Module *M, +                            const MachineFrameInfo *MFI, +                            const TargetInstrInfo *TII, LLVMContext &Ctx) { +  ModuleSlotTracker MST(M); +  if (MF) +    MST.incorporateFunction(MF->getFunction()); +  SmallVector<StringRef, 0> SSNs; +  MMO.print(OS, MST, SSNs, Ctx, MFI, TII); +} + +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, +                            const SelectionDAG *G) { +  if (G) { +    const MachineFunction *MF = &G->getMachineFunction(); +    return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(), +                           &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(), +                           *G->getContext()); +  } else { +    LLVMContext Ctx; +    return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, +                           /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); +  } +} +  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } @@ -430,9 +478,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {    if (getFlags().hasExact())      OS << " exact"; -  if (getFlags().hasUnsafeAlgebra()) -    OS << " unsafe"; -    if (getFlags().hasNoNaNs())      OS << " nnan"; @@ -448,6 +493,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {    if (getFlags().hasAllowContract())      OS << " contract"; +  if (getFlags().hasApproximateFuncs()) +    OS << " afn"; + +  if (getFlags().hasAllowReassociation()) +    OS << " reassoc"; +    if (getFlags().hasVectorReduction())      OS << " vector-reduction"; @@ -457,7 +508,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {        OS << "Mem:";        for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),             e = MN->memoperands_end(); i != e; ++i) { -        OS << **i; +        printMemOperand(OS, **i, G);          if (std::next(i) != e)            OS << " ";        } @@ -549,7 +600,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {      OS << ":" << N->getVT().getEVTString();    }    else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { -    OS << "<" << *LD->getMemOperand(); +    OS << "<"; + +    printMemOperand(OS, *LD->getMemOperand(), G);      bool doExt = true;      switch (LD->getExtensionType()) { @@ -567,7 +620,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {      OS << ">";    } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { -    OS << "<" << *ST->getMemOperand(); +    OS << "<"; +    printMemOperand(OS, *ST->getMemOperand(), G);      if (ST->isTruncatingStore())        OS << ", trunc to " << ST->getMemoryVT().getEVTString(); @@ -578,7 +632,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {      OS << ">";    } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { -    OS << "<" << *M->getMemOperand() << ">"; +    OS << "<"; +    printMemOperand(OS, *M->getMemOperand(), G); +    OS << ">";    } else if (const BlockAddressSDNode *BA =                 dyn_cast<BlockAddressSDNode>(this)) {      int64_t offset = BA->getOffset(); @@ -608,6 +664,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {      if (getNodeId() != -1)        OS << " [ID=" << getNodeId() << ']'; +    if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this)))) +      OS << "# D:" << isDivergent();      if (!G)        return; @@ -779,4 +837,8 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {      if (i) OS << ", "; else OS << " ";      printOperand(OS, G, getOperand(i));    } +  if (DebugLoc DL = getDebugLoc()) { +    OS << ", "; +    DL.print(OS); +  }  } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bd9fcfb5c1e8..f7bd8847bee3 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -29,6 +29,7 @@  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h"  #include "llvm/CodeGen/FastISel.h"  #include "llvm/CodeGen/FunctionLoweringInfo.h"  #include "llvm/CodeGen/GCMetadata.h" @@ -43,7 +44,6 @@  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachinePassRegistry.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/SchedulerRegistry.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -82,6 +82,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/Timer.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetIntrinsicInfo.h" @@ -196,7 +197,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target",  namespace llvm {    //===--------------------------------------------------------------------===// -  /// \brief This class is used by SelectionDAGISel to temporarily override +  /// This class is used by SelectionDAGISel to temporarily override    /// the optimization level on a per-function basis.    class OptLevelChanger {      SelectionDAGISel &IS; @@ -211,26 +212,27 @@ namespace llvm {          return;        IS.OptLevel = NewOptLevel;        IS.TM.setOptLevel(NewOptLevel); -      DEBUG(dbgs() << "\nChanging optimization level for Function " -            << IS.MF->getFunction().getName() << "\n"); -      DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel -            << " ; After: -O" << NewOptLevel << "\n"); +      LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function " +                        << IS.MF->getFunction().getName() << "\n"); +      LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" +                        << NewOptLevel << "\n");        SavedFastISel = IS.TM.Options.EnableFastISel;        if (NewOptLevel == CodeGenOpt::None) {          IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); -        DEBUG(dbgs() << "\tFastISel is " -              << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") -              << "\n"); +        LLVM_DEBUG( +            dbgs() << "\tFastISel is " +                   << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") +                   << "\n");        }      }      ~OptLevelChanger() {        if (IS.OptLevel == SavedOptLevel)          return; -      DEBUG(dbgs() << "\nRestoring optimization level for Function " -            << IS.MF->getFunction().getName() << "\n"); -      DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel -            << " ; After: -O" << SavedOptLevel << "\n"); +      LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function " +                        << IS.MF->getFunction().getName() << "\n"); +      LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" +                        << SavedOptLevel << "\n");        IS.OptLevel = SavedOptLevel;        IS.TM.setOptLevel(SavedOptLevel);        IS.TM.setFastISel(SavedFastISel); @@ -326,9 +328,9 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {      AU.addRequired<AAResultsWrapperPass>();    AU.addRequired<GCModuleInfo>();    AU.addRequired<StackProtector>(); -  AU.addPreserved<StackProtector>();    AU.addPreserved<GCModuleInfo>();    AU.addRequired<TargetLibraryInfoWrapperPass>(); +  AU.addRequired<TargetTransformInfoWrapperPass>();    if (UseMBPI && OptLevel != CodeGenOpt::None)      AU.addRequired<BranchProbabilityInfoWrapperPass>();    MachineFunctionPass::getAnalysisUsage(AU); @@ -410,11 +412,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {    auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();    LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; -  DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); +  LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");    SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); -  CurDAG->init(*MF, *ORE, this); +  CurDAG->init(*MF, *ORE, this, LibInfo, +   getAnalysisIfAvailable<DivergenceAnalysis>());    FuncInfo->set(Fn, *MF, CurDAG);    // Now get the optional analyzes if we want to. @@ -513,8 +516,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {          // FIXME: VR def may not be in entry block.          Def->getParent()->insert(std::next(InsertPos), MI);        } else -        DEBUG(dbgs() << "Dropping debug info for dead vreg" -              << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); +        LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" +                          << TargetRegisterInfo::virtReg2Index(Reg) << "\n");      }      // If Reg is live-in then update debug info to track its copy in a vreg. @@ -621,8 +624,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {    // at this point.    FuncInfo->clear(); -  DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); -  DEBUG(MF->print(dbgs())); +  LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); +  LLVM_DEBUG(MF->print(dbgs()));    return true;  } @@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {    int BlockNumber = -1;    (void)BlockNumber;    bool MatchFilterBB = false; (void)MatchFilterBB; +  TargetTransformInfo &TTI = +      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn);    // Pre-type legalization allow creation of any node types.    CurDAG->NewNodesMustHaveLegalTypes = false; @@ -718,7 +723,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {  #ifndef NDEBUG    MatchFilterBB = (FilterDAGBasicBlockName.empty() ||                     FilterDAGBasicBlockName == -                       FuncInfo->MBB->getBasicBlock()->getName().str()); +                       FuncInfo->MBB->getBasicBlock()->getName());  #endif  #ifdef NDEBUG    if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || @@ -730,9 +735,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {      BlockName =          (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();    } -  DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB) -               << " '" << BlockName << "'\n"; -        CurDAG->dump()); +  LLVM_DEBUG(dbgs() << "Initial selection DAG: " +                    << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                    << "'\n"; +             CurDAG->dump());    if (ViewDAGCombine1 && MatchFilterBB)      CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -744,10 +750,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {      CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);    } -  DEBUG(dbgs() << "Optimized lowered selection DAG: " -               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -               << "'\n"; -        CurDAG->dump()); +  if (TTI.hasBranchDivergence()) +    CurDAG->VerifyDAGDiverence(); + +  LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " +                    << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                    << "'\n"; +             CurDAG->dump());    // Second step, hack on the DAG until it only uses operations and types that    // the target supports. @@ -761,10 +770,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {      Changed = CurDAG->LegalizeTypes();    } -  DEBUG(dbgs() << "Type-legalized selection DAG: " -               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -               << "'\n"; -        CurDAG->dump()); +  if (TTI.hasBranchDivergence()) +    CurDAG->VerifyDAGDiverence(); + +  LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " +                    << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                    << "'\n"; +             CurDAG->dump());    // Only allow creation of legal node types.    CurDAG->NewNodesMustHaveLegalTypes = true; @@ -780,10 +792,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {        CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);      } -    DEBUG(dbgs() << "Optimized type-legalized selection DAG: " -                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -                 << "'\n"; -          CurDAG->dump()); +    if (TTI.hasBranchDivergence()) +      CurDAG->VerifyDAGDiverence(); + +    LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " +                      << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                      << "'\n"; +               CurDAG->dump());    }    { @@ -793,10 +808,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {    }    if (Changed) { -    DEBUG(dbgs() << "Vector-legalized selection DAG: " -                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -                 << "'\n"; -          CurDAG->dump()); +    LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: " +                      << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                      << "'\n"; +               CurDAG->dump());      {        NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, @@ -804,10 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {        CurDAG->LegalizeTypes();      } -    DEBUG(dbgs() << "Vector/type-legalized selection DAG: " -                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -                 << "'\n"; -          CurDAG->dump()); +    LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: " +                      << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                      << "'\n"; +               CurDAG->dump());      if (ViewDAGCombineLT && MatchFilterBB)        CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -819,10 +834,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {        CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);      } -    DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " -                 << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -                 << "'\n"; -          CurDAG->dump()); +    LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " +                      << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                      << "'\n"; +               CurDAG->dump()); + +    if (TTI.hasBranchDivergence()) +      CurDAG->VerifyDAGDiverence();    }    if (ViewLegalizeDAGs && MatchFilterBB) @@ -834,10 +852,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {      CurDAG->Legalize();    } -  DEBUG(dbgs() << "Legalized selection DAG: " -               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -               << "'\n"; -        CurDAG->dump()); +  if (TTI.hasBranchDivergence()) +    CurDAG->VerifyDAGDiverence(); + +  LLVM_DEBUG(dbgs() << "Legalized selection DAG: " +                    << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                    << "'\n"; +             CurDAG->dump());    if (ViewDAGCombine2 && MatchFilterBB)      CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -849,10 +870,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {      CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);    } -  DEBUG(dbgs() << "Optimized legalized selection DAG: " -               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -               << "'\n"; -        CurDAG->dump()); +  if (TTI.hasBranchDivergence()) +    CurDAG->VerifyDAGDiverence(); + +  LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " +                    << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                    << "'\n"; +             CurDAG->dump());    if (OptLevel != CodeGenOpt::None)      ComputeLiveOutVRegInfo(); @@ -868,10 +892,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {      DoInstructionSelection();    } -  DEBUG(dbgs() << "Selected selection DAG: " -               << printMBBReference(*FuncInfo->MBB) << " '" << BlockName -               << "'\n"; -        CurDAG->dump()); +  LLVM_DEBUG(dbgs() << "Selected selection DAG: " +                    << printMBBReference(*FuncInfo->MBB) << " '" << BlockName +                    << "'\n"; +             CurDAG->dump());    if (ViewSchedDAGs && MatchFilterBB)      CurDAG->viewGraph("scheduler input for " + BlockName); @@ -937,10 +961,62 @@ public:  } // end anonymous namespace +// This function is used to enforce the topological node id property +// property leveraged during Instruction selection. Before selection all +// nodes are given a non-negative id such that all nodes have a larger id than +// their operands. As this holds transitively we can prune checks that a node N +// is a predecessor of M another by not recursively checking through M's +// operands if N's ID is larger than M's ID. This is significantly improves +// performance of for various legality checks (e.g. IsLegalToFold / +// UpdateChains). + +// However, when we fuse multiple nodes into a single node +// during selection we may induce a predecessor relationship between inputs and +// outputs of distinct nodes being merged violating the topological property. +// Should a fused node have a successor which has yet to be selected, our +// legality checks would be incorrect. To avoid this we mark all unselected +// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => +// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. +// We use bit-negation to more clearly enforce that node id -1 can only be +// achieved by selected nodes). As the conversion is reversable the original Id, +// topological pruning can still be leveraged when looking for unselected nodes. +// This method is call internally in all ISel replacement calls. +void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { +  SmallVector<SDNode *, 4> Nodes; +  Nodes.push_back(Node); + +  while (!Nodes.empty()) { +    SDNode *N = Nodes.pop_back_val(); +    for (auto *U : N->uses()) { +      auto UId = U->getNodeId(); +      if (UId > 0) { +        InvalidateNodeId(U); +        Nodes.push_back(U); +      } +    } +  } +} + +// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a +// NodeId with the equivalent node id which is invalid for topological +// pruning. +void SelectionDAGISel::InvalidateNodeId(SDNode *N) { +  int InvalidId = -(N->getNodeId() + 1); +  N->setNodeId(InvalidId); +} + +// getUninvalidatedNodeId - get original uninvalidated node id. +int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) { +  int Id = N->getNodeId(); +  if (Id < -1) +    return -(Id + 1); +  return Id; +} +  void SelectionDAGISel::DoInstructionSelection() { -  DEBUG(dbgs() << "===== Instruction selection begins: " -               << printMBBReference(*FuncInfo->MBB) << " '" -               << FuncInfo->MBB->getName() << "'\n"); +  LLVM_DEBUG(dbgs() << "===== Instruction selection begins: " +                    << printMBBReference(*FuncInfo->MBB) << " '" +                    << FuncInfo->MBB->getName() << "'\n");    PreprocessISelDAG(); @@ -972,6 +1048,33 @@ void SelectionDAGISel::DoInstructionSelection() {        if (Node->use_empty())          continue; +#ifndef NDEBUG +      SmallVector<SDNode *, 4> Nodes; +      Nodes.push_back(Node); + +      while (!Nodes.empty()) { +        auto N = Nodes.pop_back_val(); +        if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0) +          continue; +        for (const SDValue &Op : N->op_values()) { +          if (Op->getOpcode() == ISD::TokenFactor) +            Nodes.push_back(Op.getNode()); +          else { +            // We rely on topological ordering of node ids for checking for +            // cycles when fusing nodes during selection. All unselected nodes +            // successors of an already selected node should have a negative id. +            // This assertion will catch such cases. If this assertion triggers +            // it is likely you using DAG-level Value/Node replacement functions +            // (versus equivalent ISEL replacement) in backend-specific +            // selections. See comment in EnforceNodeIdInvariant for more +            // details. +            assert(Op->getNodeId() != -1 && +                   "Node has already selected predecessor node"); +          } +        } +      } +#endif +        // When we are using non-default rounding modes or FP exception behavior        // FP operations are represented by StrictFP pseudo-operations.  They        // need to be simplified here so that the target-specific instruction @@ -985,13 +1088,16 @@ void SelectionDAGISel::DoInstructionSelection() {        if (Node->isStrictFPOpcode())          Node = CurDAG->mutateStrictFPToFP(Node); +      LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; +                 Node->dump(CurDAG)); +        Select(Node);      }      CurDAG->setRoot(Dummy.getValue());    } -  DEBUG(dbgs() << "===== Instruction selection ends:\n"); +  LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n");    PostprocessISelDAG();  } @@ -1264,7 +1370,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {        }        auto DLoc = isa<Instruction>(SwiftErrorVal) -                      ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc() +                      ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()                        : DebugLoc();        const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo(); @@ -1381,7 +1487,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {    // Initialize the Fast-ISel state, if needed.    FastISel *FastIS = nullptr;    if (TM.Options.EnableFastISel) { -    DEBUG(dbgs() << "Enabling fast-isel\n"); +    LLVM_DEBUG(dbgs() << "Enabling fast-isel\n");      FastIS = TLI->createFastISel(*FuncInfo, LibInfo);    } @@ -1398,6 +1504,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {    FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];    FuncInfo->InsertPt = FuncInfo->MBB->begin(); +  CurDAG->setFunctionLoweringInfo(FuncInfo); +    if (!FastIS) {      LowerArguments(Fn);    } else { @@ -1435,6 +1543,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {    processDbgDeclares(FuncInfo);    // Iterate over all basic blocks in the function. +  StackProtector &SP = getAnalysis<StackProtector>();    for (const BasicBlock *LLVMBB : RPOT) {      if (OptLevel != CodeGenOpt::None) {        bool AllPredsVisited = true; @@ -1604,7 +1713,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {        FastIS->recomputeInsertPt();      } -    if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) { +    if (SP.shouldEmitSDCheck(*LLVMBB)) {        bool FunctionBasedInstrumentation =            TLI->getSSPStackGuardCheck(*Fn.getParent());        SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], @@ -1630,11 +1739,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {          FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());      } +    if (FastIS) +      FastIS->finishBasicBlock();      FinishBasicBlock();      FuncInfo->PHINodesToUpdate.clear();      ElidedArgCopyInstrs.clear();    } +  SP.copyToMachineFrameInfo(MF->getFrameInfo()); +    propagateSwiftErrorVRegs(FuncInfo);    delete FastIS; @@ -1728,12 +1841,12 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {  void  SelectionDAGISel::FinishBasicBlock() { -  DEBUG(dbgs() << "Total amount of phi nodes to update: " -               << FuncInfo->PHINodesToUpdate.size() << "\n"; -        for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) -          dbgs() << "Node " << i << " : (" -                 << FuncInfo->PHINodesToUpdate[i].first -                 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); +  LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: " +                    << FuncInfo->PHINodesToUpdate.size() << "\n"; +             for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; +                  ++i) dbgs() +             << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first +             << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");    // Next, now that we know what the last MBB the LLVM BB expanded is, update    // PHI nodes in successors. @@ -2012,7 +2125,7 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,      return true;    // If the actual AND mask is allowing unallowed bits, this doesn't match. -  if (ActualMask.intersects(~DesiredMask)) +  if (!ActualMask.isSubsetOf(DesiredMask))      return false;    // Otherwise, the DAG Combiner may have proven that the value coming in is @@ -2041,7 +2154,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,      return true;    // If the actual AND mask is allowing unallowed bits, this doesn't match. -  if (ActualMask.intersects(~DesiredMask)) +  if (!ActualMask.isSubsetOf(DesiredMask))      return false;    // Otherwise, the DAG Combiner may have proven that the value coming in is @@ -2134,52 +2247,44 @@ static SDNode *findGlueUse(SDNode *N) {    return nullptr;  } -/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". -/// This function iteratively traverses up the operand chain, ignoring -/// certain nodes. -static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, -                          SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, +/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path +/// beyond "ImmedUse".  We may ignore chains as they are checked separately. +static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,                            bool IgnoreChains) { -  // The NodeID's are given uniques ID's where a node ID is guaranteed to be -  // greater than all of its (recursive) operands.  If we scan to a point where -  // 'use' is smaller than the node we're scanning for, then we know we will -  // never find it. -  // -  // The Use may be -1 (unassigned) if it is a newly allocated node.  This can -  // happen because we scan down to newly selected nodes in the case of glue -  // uses. -  std::vector<SDNode *> WorkList; -  WorkList.push_back(Use); - -  while (!WorkList.empty()) { -    Use = WorkList.back(); -    WorkList.pop_back(); -    if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) -      continue; +  SmallPtrSet<const SDNode *, 16> Visited; +  SmallVector<const SDNode *, 16> WorkList; +  // Only check if we have non-immediate uses of Def. +  if (ImmedUse->isOnlyUserOf(Def)) +    return false; -    // Don't revisit nodes if we already scanned it and didn't fail, we know we -    // won't fail if we scan it again. -    if (!Visited.insert(Use).second) +  // We don't care about paths to Def that go through ImmedUse so mark it +  // visited and mark non-def operands as used. +  Visited.insert(ImmedUse); +  for (const SDValue &Op : ImmedUse->op_values()) { +    SDNode *N = Op.getNode(); +    // Ignore chain deps (they are validated by +    // HandleMergeInputChains) and immediate uses +    if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)        continue; +    if (!Visited.insert(N).second) +      continue; +    WorkList.push_back(N); +  } -    for (const SDValue &Op : Use->op_values()) { -      // Ignore chain uses, they are validated by HandleMergeInputChains. -      if (Op.getValueType() == MVT::Other && IgnoreChains) -        continue; - +  // Initialize worklist to operands of Root. +  if (Root != ImmedUse) { +    for (const SDValue &Op : Root->op_values()) {        SDNode *N = Op.getNode(); -      if (N == Def) { -        if (Use == ImmedUse || Use == Root) -          continue;  // We are not looking for immediate use. -        assert(N != Root); -        return true; -      } - -      // Traverse up the operand chain. +      // Ignore chains (they are validated by HandleMergeInputChains) +      if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) +        continue; +      if (!Visited.insert(N).second) +        continue;        WorkList.push_back(N);      }    } -  return false; + +  return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);  }  /// IsProfitableToFold - Returns true if it's profitable to fold the specific @@ -2199,7 +2304,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,    // If Root use can somehow reach N through a path that that doesn't contain    // U then folding N would create a cycle. e.g. In the following -  // diagram, Root can reach N through X. If N is folded into into Root, then +  // diagram, Root can reach N through X. If N is folded into Root, then    // X is both a predecessor and a successor of U.    //    //          [N*]           // @@ -2251,13 +2356,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,      // If our query node has a glue result with a use, we've walked up it.  If      // the user (which has already been selected) has a chain or indirectly uses -    // the chain, our WalkChainUsers predicate will not consider it.  Because of +    // the chain, HandleMergeInputChains will not consider it.  Because of      // this, we cannot ignore chains in this predicate.      IgnoreChains = false;    } -  SmallPtrSet<SDNode*, 16> Visited; -  return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); +  return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);  }  void SelectionDAGISel::Select_INLINEASM(SDNode *N) { @@ -2360,7 +2464,8 @@ void SelectionDAGISel::UpdateChains(              std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N,                           static_cast<SDNode *>(nullptr));            }); -      CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); +      if (ChainNode->getOpcode() != ISD::TokenFactor) +        ReplaceUses(ChainVal, InputChain);        // If the node became dead and we haven't already seen it, delete it.        if (ChainNode != NodeToMatch && ChainNode->use_empty() && @@ -2372,144 +2477,7 @@ void SelectionDAGISel::UpdateChains(    if (!NowDeadNodes.empty())      CurDAG->RemoveDeadNodes(NowDeadNodes); -  DEBUG(dbgs() << "ISEL: Match complete!\n"); -} - -enum ChainResult { -  CR_Simple, -  CR_InducesCycle, -  CR_LeadsToInteriorNode -}; - -/// WalkChainUsers - Walk down the users of the specified chained node that is -/// part of the pattern we're matching, looking at all of the users we find. -/// This determines whether something is an interior node, whether we have a -/// non-pattern node in between two pattern nodes (which prevent folding because -/// it would induce a cycle) and whether we have a TokenFactor node sandwiched -/// between pattern nodes (in which case the TF becomes part of the pattern). -/// -/// The walk we do here is guaranteed to be small because we quickly get down to -/// already selected nodes "below" us. -static ChainResult -WalkChainUsers(const SDNode *ChainedNode, -               SmallVectorImpl<SDNode *> &ChainedNodesInPattern, -               DenseMap<const SDNode *, ChainResult> &TokenFactorResult, -               SmallVectorImpl<SDNode *> &InteriorChainedNodes) { -  ChainResult Result = CR_Simple; - -  for (SDNode::use_iterator UI = ChainedNode->use_begin(), -         E = ChainedNode->use_end(); UI != E; ++UI) { -    // Make sure the use is of the chain, not some other value we produce. -    if (UI.getUse().getValueType() != MVT::Other) continue; - -    SDNode *User = *UI; - -    if (User->getOpcode() == ISD::HANDLENODE)  // Root of the graph. -      continue; - -    // If we see an already-selected machine node, then we've gone beyond the -    // pattern that we're selecting down into the already selected chunk of the -    // DAG. -    unsigned UserOpcode = User->getOpcode(); -    if (User->isMachineOpcode() || -        UserOpcode == ISD::CopyToReg || -        UserOpcode == ISD::CopyFromReg || -        UserOpcode == ISD::INLINEASM || -        UserOpcode == ISD::EH_LABEL || -        UserOpcode == ISD::LIFETIME_START || -        UserOpcode == ISD::LIFETIME_END) { -      // If their node ID got reset to -1 then they've already been selected. -      // Treat them like a MachineOpcode. -      if (User->getNodeId() == -1) -        continue; -    } - -    // If we have a TokenFactor, we handle it specially. -    if (User->getOpcode() != ISD::TokenFactor) { -      // If the node isn't a token factor and isn't part of our pattern, then it -      // must be a random chained node in between two nodes we're selecting. -      // This happens when we have something like: -      //   x = load ptr -      //   call -      //   y = x+4 -      //   store y -> ptr -      // Because we structurally match the load/store as a read/modify/write, -      // but the call is chained between them.  We cannot fold in this case -      // because it would induce a cycle in the graph. -      if (!std::count(ChainedNodesInPattern.begin(), -                      ChainedNodesInPattern.end(), User)) -        return CR_InducesCycle; - -      // Otherwise we found a node that is part of our pattern.  For example in: -      //   x = load ptr -      //   y = x+4 -      //   store y -> ptr -      // This would happen when we're scanning down from the load and see the -      // store as a user.  Record that there is a use of ChainedNode that is -      // part of the pattern and keep scanning uses. -      Result = CR_LeadsToInteriorNode; -      InteriorChainedNodes.push_back(User); -      continue; -    } - -    // If we found a TokenFactor, there are two cases to consider: first if the -    // TokenFactor is just hanging "below" the pattern we're matching (i.e. no -    // uses of the TF are in our pattern) we just want to ignore it.  Second, -    // the TokenFactor can be sandwiched in between two chained nodes, like so: -    //     [Load chain] -    //         ^ -    //         | -    //       [Load] -    //       ^    ^ -    //       |    \                    DAG's like cheese -    //      /       \                       do you? -    //     /         | -    // [TokenFactor] [Op] -    //     ^          ^ -    //     |          | -    //      \        / -    //       \      / -    //       [Store] -    // -    // In this case, the TokenFactor becomes part of our match and we rewrite it -    // as a new TokenFactor. -    // -    // To distinguish these two cases, do a recursive walk down the uses. -    auto MemoizeResult = TokenFactorResult.find(User); -    bool Visited = MemoizeResult != TokenFactorResult.end(); -    // Recursively walk chain users only if the result is not memoized. -    if (!Visited) { -      auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, -                                InteriorChainedNodes); -      MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; -    } -    switch (MemoizeResult->second) { -    case CR_Simple: -      // If the uses of the TokenFactor are just already-selected nodes, ignore -      // it, it is "below" our pattern. -      continue; -    case CR_InducesCycle: -      // If the uses of the TokenFactor lead to nodes that are not part of our -      // pattern that are not selected, folding would turn this into a cycle, -      // bail out now. -      return CR_InducesCycle; -    case CR_LeadsToInteriorNode: -      break;  // Otherwise, keep processing. -    } - -    // Okay, we know we're in the interesting interior case.  The TokenFactor -    // is now going to be considered part of the pattern so that we rewrite its -    // uses (it may have uses that are not part of the pattern) with the -    // ultimate chain result of the generated code.  We will also add its chain -    // inputs as inputs to the ultimate TokenFactor we create. -    Result = CR_LeadsToInteriorNode; -    if (!Visited) { -      ChainedNodesInPattern.push_back(User); -      InteriorChainedNodes.push_back(User); -    } -  } - -  return Result; +  LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n");  }  /// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains @@ -2521,47 +2489,56 @@ WalkChainUsers(const SDNode *ChainedNode,  static SDValue  HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,                         SelectionDAG *CurDAG) { -  // Used for memoization. Without it WalkChainUsers could take exponential -  // time to run. -  DenseMap<const SDNode *, ChainResult> TokenFactorResult; -  // Walk all of the chained nodes we've matched, recursively scanning down the -  // users of the chain result. This adds any TokenFactor nodes that are caught -  // in between chained nodes to the chained and interior nodes list. -  SmallVector<SDNode*, 3> InteriorChainedNodes; -  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { -    if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, -                       TokenFactorResult, -                       InteriorChainedNodes) == CR_InducesCycle) -      return SDValue(); // Would induce a cycle. -  } -  // Okay, we have walked all the matched nodes and collected TokenFactor nodes -  // that we are interested in.  Form our input TokenFactor node. +  SmallPtrSet<const SDNode *, 16> Visited; +  SmallVector<const SDNode *, 8> Worklist;    SmallVector<SDValue, 3> InputChains; -  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { -    // Add the input chain of this node to the InputChains list (which will be -    // the operands of the generated TokenFactor) if it's not an interior node. -    SDNode *N = ChainNodesMatched[i]; -    if (N->getOpcode() != ISD::TokenFactor) { -      if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) -        continue; +  unsigned int Max = 8192; -      // Otherwise, add the input chain. -      SDValue InChain = ChainNodesMatched[i]->getOperand(0); -      assert(InChain.getValueType() == MVT::Other && "Not a chain"); -      InputChains.push_back(InChain); -      continue; -    } +  // Quick exit on trivial merge. +  if (ChainNodesMatched.size() == 1) +    return ChainNodesMatched[0]->getOperand(0); -    // If we have a token factor, we want to add all inputs of the token factor -    // that are not part of the pattern we're matching. -    for (const SDValue &Op : N->op_values()) { -      if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), -                      Op.getNode())) -        InputChains.push_back(Op); -    } +  // Add chains that aren't already added (internal). Peek through +  // token factors. +  std::function<void(const SDValue)> AddChains = [&](const SDValue V) { +    if (V.getValueType() != MVT::Other) +      return; +    if (V->getOpcode() == ISD::EntryToken) +      return; +    if (!Visited.insert(V.getNode()).second) +      return; +    if (V->getOpcode() == ISD::TokenFactor) { +      for (const SDValue &Op : V->op_values()) +        AddChains(Op); +    } else +      InputChains.push_back(V); +  }; + +  for (auto *N : ChainNodesMatched) { +    Worklist.push_back(N); +    Visited.insert(N);    } +  while (!Worklist.empty()) +    AddChains(Worklist.pop_back_val()->getOperand(0)); + +  // Skip the search if there are no chain dependencies. +  if (InputChains.size() == 0) +    return CurDAG->getEntryNode(); + +  // If one of these chains is a successor of input, we must have a +  // node that is both the predecessor and successor of the +  // to-be-merged nodes. Fail. +  Visited.clear(); +  for (SDValue V : InputChains) +    Worklist.push_back(V.getNode()); + +  for (auto *N : ChainNodesMatched) +    if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) +      return SDValue(); + +  // Return merged chain.    if (InputChains.size() == 1)      return InputChains[0];    return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), @@ -2606,8 +2583,8 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,    // Move the glue if needed.    if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&        (unsigned)OldGlueResultNo != ResNumResults-1) -    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), -                                      SDValue(Res, ResNumResults-1)); +    ReplaceUses(SDValue(Node, OldGlueResultNo), +                SDValue(Res, ResNumResults - 1));    if ((EmitNodeInfo & OPFL_GlueOutput) != 0)      --ResNumResults; @@ -2615,14 +2592,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,    // Move the chain reference if needed.    if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&        (unsigned)OldChainResultNo != ResNumResults-1) -    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), -                                      SDValue(Res, ResNumResults-1)); +    ReplaceUses(SDValue(Node, OldChainResultNo), +                SDValue(Res, ResNumResults - 1));    // Otherwise, no replacement happened because the node already exists. Replace    // Uses of the old node with the new one.    if (Res != Node) { -    CurDAG->ReplaceAllUsesWith(Node, Res); -    CurDAG->RemoveDeadNode(Node); +    ReplaceNode(Node, Res); +  } else { +    EnforceNodeIdInvariant(Res);    }    return Res; @@ -2861,7 +2839,7 @@ struct MatchScope {    bool HasChainNodesMatched;  }; -/// \\brief A DAG update listener to keep the matching state +/// \A DAG update listener to keep the matching state  /// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to  /// change the DAG while matching.  X86 addressing mode matcher is an example  /// for this. @@ -2939,8 +2917,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,      return;    case ISD::AssertSext:    case ISD::AssertZext: -    CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), -                                      NodeToMatch->getOperand(0)); +    ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));      CurDAG->RemoveDeadNode(NodeToMatch);      return;    case ISD::INLINEASM: @@ -2988,9 +2965,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,    // update the chain results when the pattern is complete.    SmallVector<SDNode*, 3> ChainNodesMatched; -  DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; -        NodeToMatch->dump(CurDAG); -        dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n");    // Determine where to start the interpreter.  Normally we start at opcode #0,    // but if the state machine starts with an OPC_SwitchOpcode, then we @@ -3002,7 +2977,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,      // Already computed the OpcodeOffset table, just index into it.      if (N.getOpcode() < OpcodeOffset.size())        MatcherIndex = OpcodeOffset[N.getOpcode()]; -    DEBUG(dbgs() << "  Initial Opcode index to " << MatcherIndex << "\n"); +    LLVM_DEBUG(dbgs() << "  Initial Opcode index to " << MatcherIndex << "\n");    } else if (MatcherTable[0] == OPC_SwitchOpcode) {      // Otherwise, the table isn't computed, but the state machine does start @@ -3069,9 +3044,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,          if (!Result)            break; -        DEBUG(dbgs() << "  Skipped scope entry (due to false predicate) at " -                     << "index " << MatcherIndexOfPredicate -                     << ", continuing at " << FailIndex << "\n"); +        LLVM_DEBUG( +            dbgs() << "  Skipped scope entry (due to false predicate) at " +                   << "index " << MatcherIndexOfPredicate << ", continuing at " +                   << FailIndex << "\n");          ++NumDAGIselRetries;          // Otherwise, we know that this case of the Scope is guaranteed to fail, @@ -3120,11 +3096,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,        if (auto *MN = dyn_cast<MemSDNode>(N))          MatchedMemRefs.push_back(MN->getMemOperand());        else { -        DEBUG( -          dbgs() << "Expected MemSDNode "; -          N->dump(CurDAG); -          dbgs() << '\n' -        ); +        LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG); +                   dbgs() << '\n');        }        continue; @@ -3245,8 +3218,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,        if (CaseSize == 0) break;        // Otherwise, execute the case we found. -      DEBUG(dbgs() << "  OpcodeSwitch from " << SwitchStart -                   << " to " << MatcherIndex << "\n"); +      LLVM_DEBUG(dbgs() << "  OpcodeSwitch from " << SwitchStart << " to " +                        << MatcherIndex << "\n");        continue;      } @@ -3277,8 +3250,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,        if (CaseSize == 0) break;        // Otherwise, execute the case we found. -      DEBUG(dbgs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString() -                   << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); +      LLVM_DEBUG(dbgs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString() +                        << "] from " << SwitchStart << " to " << MatcherIndex +                        << '\n');        continue;      }      case OPC_CheckChild0Type: case OPC_CheckChild1Type: @@ -3658,16 +3632,11 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,          Res->setMemRefs(MemRefs, MemRefs + NumMemRefs);        } -      DEBUG( -        if (!MatchedMemRefs.empty() && Res->memoperands_empty()) -          dbgs() << "  Dropping mem operands\n"; -        dbgs() << "  " -               << (IsMorphNodeTo ? "Morphed" : "Created") -               << " node: "; -        Res->dump(CurDAG); - -        dbgs() << '\n'; -      ); +      LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs() +                     << "  Dropping mem operands\n"; +                 dbgs() << "  " << (IsMorphNodeTo ? "Morphed" : "Created") +                        << " node: "; +                 Res->dump(CurDAG););        // If this was a MorphNodeTo then we're completely done!        if (IsMorphNodeTo) { @@ -3702,7 +3671,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,                  NodeToMatch->getValueType(i).getSizeInBits() ==                      Res.getValueSizeInBits()) &&                 "invalid replacement"); -        CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); +        ReplaceUses(SDValue(NodeToMatch, i), Res);        }        // Update chain uses. @@ -3715,8 +3684,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,        if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==                MVT::Glue &&            InputGlue.getNode()) -        CurDAG->ReplaceAllUsesOfValueWith( -            SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); +        ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), +                    InputGlue);        assert(NodeToMatch->use_empty() &&               "Didn't replace all uses of the node?"); @@ -3729,7 +3698,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,      // If the code reached this point, then the match failed.  See if there is      // another child to try in the current 'Scope', otherwise pop it until we      // find a case to check. -    DEBUG(dbgs() << "  Match failed at index " << CurrentOpcodeIndex << "\n"); +    LLVM_DEBUG(dbgs() << "  Match failed at index " << CurrentOpcodeIndex +                      << "\n");      ++NumDAGIselRetries;      while (true) {        if (MatchScopes.empty()) { @@ -3749,7 +3719,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,          MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);        MatcherIndex = LastScope.FailIndex; -      DEBUG(dbgs() << "  Continuing at " << MatcherIndex << "\n"); +      LLVM_DEBUG(dbgs() << "  Continuing at " << MatcherIndex << "\n");        InputChain = LastScope.InputChain;        InputGlue = LastScope.InputGlue; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index be4ab094bf49..3b19bff4743d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -229,7 +229,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet    if (level >= 20) {      if (!printed) {        printed = true; -      DEBUG(dbgs() << "setSubgraphColor hit max level\n"); +      LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n");      }      return true;    } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 3f64b49e3555..5cf06e62b80c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -28,7 +28,6 @@  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/SelectionDAG.h"  #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -43,6 +42,7 @@  #include "llvm/IR/Statepoint.h"  #include "llvm/IR/Type.h"  #include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Target/TargetOptions.h"  #include <cassert> diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d76e52d78870..fa867fcec366 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,7 +20,6 @@  #include "llvm/CodeGen/MachineJumpTableInfo.h"  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/IR/DataLayout.h" @@ -32,6 +31,7 @@  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h"  #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h"  #include "llvm/Target/TargetMachine.h"  #include <cctype>  using namespace llvm; @@ -96,7 +96,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,    return true;  } -/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// Set CallLoweringInfo attribute flags based on a call instruction  /// and called function attributes.  void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,                                                       unsigned ArgIdx) { @@ -524,6 +524,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    }    // Other users may use these bits. +  EVT VT = Op.getValueType();    if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {      if (Depth != 0) {        // If not at the root, Just compute the Known bits to @@ -537,7 +538,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    } else if (DemandedMask == 0) {      // Not demanding any bits from Op.      if (!Op.isUndef()) -      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); +      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));      return false;    } else if (Depth == 6) {        // Limit search depth.      return false; @@ -580,7 +581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        KnownBits LHSKnown;        // Do not increment Depth here; that can cause an infinite loop.        TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); -      // If the LHS already has zeros where RHSC does, this and is dead. +      // If the LHS already has zeros where RHSC does, this 'and' is dead.        if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))          return TLO.CombineTo(Op, Op0); @@ -596,8 +597,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1        if (isBitwiseNot(Op0) && Op0.hasOneUse() &&            LHSKnown.One == ~RHSC->getAPIntValue()) { -        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), -                                      Op0.getOperand(0), Op.getOperand(1)); +        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), +                                      Op.getOperand(1));          return TLO.CombineTo(Op, Xor);        }      } @@ -618,7 +619,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        return TLO.CombineTo(Op, Op.getOperand(1));      // If all of the demanded bits in the inputs are known zeros, return zero.      if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) -      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); +      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));      // If the RHS is a constant, see if we can simplify it.      if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))        return true; @@ -680,7 +681,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // (but not both) turn this into an *inclusive* or.      //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0      if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) -      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), +      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,                                                 Op.getOperand(0),                                                 Op.getOperand(1))); @@ -696,7 +697,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // NB: it is okay if more bits are known than are requested      if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side        if (Known.One == Known2.One) { // set bits are the same on both sides -        EVT VT = Op.getValueType();          SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT);          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,                                                   Op.getOperand(0), ANDC)); @@ -710,7 +710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      if (C && !C->isAllOnesValue()) {        if (NewMask.isSubsetOf(C->getAPIntValue())) {          // We're flipping all demanded bits. Flip the undemanded bits too. -        SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType()); +        SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);          return TLO.CombineTo(Op, New);        }        // If we can't turn this into a 'not', try to shrink the constant. @@ -761,7 +761,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // width as the setcc result, and (3) the result of a setcc conforms to 0 or      // -1, we may be able to bypass the setcc.      if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && -        getBooleanContents(Op.getValueType()) == +        getBooleanContents(VT) ==              BooleanContent::ZeroOrNegativeOneBooleanContent) {        // If we're testing X < 0, then this compare isn't needed - just use X!        // FIXME: We're limiting to integer types here, but this should also work @@ -807,7 +807,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,                SDValue NewSA =                  TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); -              EVT VT = Op.getValueType();                return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,                                                         InOp.getOperand(0),                                                         NewSA)); @@ -835,8 +834,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,                              TLO.DAG.getConstant(ShAmt, dl, ShTy));            return              TLO.CombineTo(Op, -                          TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), -                                          NarrowShl)); +                          TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));          }          // Repeat the SHL optimization above in cases where an extension          // intervenes: (shl (anyext (shr x, c1)), c2) to @@ -854,7 +852,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,                SDValue NewSA =                  TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,                                      Op.getOperand(1).getValueType()); -              EVT VT = Op.getValueType();                SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,                                                 InnerOp.getOperand(0));                return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, @@ -904,7 +901,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,                SDValue NewSA =                  TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); -              EVT VT = Op.getValueType();                return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,                                                         InOp.getOperand(0),                                                         NewSA)); @@ -930,12 +926,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // the shift amount is >= the size of the datatype, which is undefined.      if (NewMask.isOneValue())        return TLO.CombineTo(Op, -                           TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), -                                           Op.getOperand(0), Op.getOperand(1))); +                           TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), +                                           Op.getOperand(1)));      if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { -      EVT VT = Op.getValueType(); -        // If the shift count is an invalid immediate, don't do anything.        if (SA->getAPIntValue().uge(BitWidth))          break; @@ -1000,14 +994,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        if (!AlreadySignExtended) {          // Compute the correct shift amount type, which must be getShiftAmountTy          // for scalar types after legalization. -        EVT ShiftAmtTy = Op.getValueType(); +        EVT ShiftAmtTy = VT;          if (TLO.LegalTypes() && !ShiftAmtTy.isVector())            ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);          SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,                                                 ShiftAmtTy); -        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, -                                                 Op.getValueType(), InOp, +        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,                                                   ShiftAmt));        }      } @@ -1072,8 +1065,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // If none of the top bits are demanded, convert this into an any_extend.      if (NewMask.getActiveBits() <= OperandBitWidth) -      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, -                                               Op.getValueType(), +      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,                                                 Op.getOperand(0)));      APInt InMask = NewMask.trunc(OperandBitWidth); @@ -1089,8 +1081,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // If none of the top bits are demanded, convert this into an any_extend.      if (NewMask.getActiveBits() <= InBits) -      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, -                                              Op.getValueType(), +      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,                                                Op.getOperand(0)));      // Since some of the sign extended bits are demanded, we know that the sign @@ -1107,8 +1098,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      // If the sign bit is known zero, convert this to a zero extend.      if (Known.isNonNegative()) -      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, -                                               Op.getValueType(), +      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,                                                 Op.getOperand(0)));      break;    } @@ -1139,8 +1129,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,        case ISD::SRL:          // Shrink SRL by a constant if none of the high bits shifted in are          // demanded. -        if (TLO.LegalTypes() && -            !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) +        if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))            // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is            // undesirable.            break; @@ -1150,8 +1139,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,          SDValue Shift = In.getOperand(1);          if (TLO.LegalTypes()) {            uint64_t ShVal = ShAmt->getZExtValue(); -          Shift = TLO.DAG.getConstant(ShVal, dl, -                                      getShiftAmountTy(Op.getValueType(), DL)); +          Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));          }          if (ShAmt->getZExtValue() < BitWidth) { @@ -1163,12 +1151,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,            if (!(HighBits & NewMask)) {              // None of the shifted in bits are needed.  Add a truncate of the              // shift input, then shift it. -            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, -                                               Op.getValueType(), +            SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,                                                 In.getOperand(0)); -            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, -                                                     Op.getValueType(), -                                                     NewTrunc, +            return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,                                                       Shift));            }          } @@ -1182,9 +1167,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    case ISD::AssertZext: {      // AssertZext demands all of the high bits, plus any of the low bits      // demanded by its users. -    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); -    APInt InMask = APInt::getLowBitsSet(BitWidth, -                                        VT.getSizeInBits()); +    EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); +    APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());      if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,                               Known, TLO, Depth+1))        return true; @@ -1196,40 +1180,45 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,    case ISD::BITCAST:      // If this is an FP->Int bitcast and if the sign bit is the only      // thing demanded, turn this into a FGETSIGN. -    if (!TLO.LegalOperations() && -        !Op.getValueType().isVector() && +    if (!TLO.LegalOperations() && !VT.isVector() &&          !Op.getOperand(0).getValueType().isVector() &&          NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&          Op.getOperand(0).getValueType().isFloatingPoint()) { -      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); +      bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);        bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); -      if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && +      if ((OpVTLegal || i32Legal) && VT.isSimple() && +           Op.getOperand(0).getValueType() != MVT::f16 &&             Op.getOperand(0).getValueType() != MVT::f128) {          // Cannot eliminate/lower SHL for f128 yet. -        EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; +        EVT Ty = OpVTLegal ? VT : MVT::i32;          // Make a FGETSIGN + SHL to move the sign bit into the appropriate          // place.  We expect the SHL to be eliminated by other optimizations.          SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));          unsigned OpVTSizeInBits = Op.getValueSizeInBits();          if (!OpVTLegal && OpVTSizeInBits > 32) -          Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); +          Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);          unsigned ShVal = Op.getValueSizeInBits() - 1; -        SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); -        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, -                                                 Op.getValueType(), -                                                 Sign, ShAmt)); +        SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT); +        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));        }      } +    // If this is a bitcast, let computeKnownBits handle it.  Only do this on a +    // recursive call where Known may be useful to the caller. +    if (Depth > 0) { +      TLO.DAG.computeKnownBits(Op, Known, Depth); +      return false; +    }      break;    case ISD::ADD:    case ISD::MUL:    case ISD::SUB: {      // Add, Sub, and Mul don't demand any bits in positions beyond that      // of the highest bit demanded of them. -    APInt LoMask = APInt::getLowBitsSet(BitWidth, -                                        BitWidth - NewMask.countLeadingZeros()); -    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) || -        SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) || +    SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); +    unsigned NewMaskLZ = NewMask.countLeadingZeros(); +    APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ); +    if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) || +        SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||          // See if the operation should be performed at a smaller bit width.          ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {        SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -1238,13 +1227,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,          // won't wrap after simplification.          Flags.setNoSignedWrap(false);          Flags.setNoUnsignedWrap(false); -        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), -                                        Op.getOperand(0), Op.getOperand(1), +        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,                                          Flags);          return TLO.CombineTo(Op, NewOp);        }        return true;      } + +    // If we have a constant operand, we may be able to turn it into -1 if we +    // do not demand the high bits. This can make the constant smaller to +    // encode, allow more general folding, or match specialized instruction +    // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that +    // is probably not useful (and could be detrimental). +    ConstantSDNode *C = isConstOrConstSplat(Op1); +    APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ); +    if (C && !C->isAllOnesValue() && !C->isOne() && +        (C->getAPIntValue() | HighMask).isAllOnesValue()) { +      SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); +      // We can't guarantee that the new math op doesn't wrap, so explicitly +      // clear those flags to prevent folding with a potential existing node +      // that has those flags set. +      SDNodeFlags Flags; +      Flags.setNoSignedWrap(false); +      Flags.setNoUnsignedWrap(false); +      SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); +      return TLO.CombineTo(Op, NewOp); +    } +      LLVM_FALLTHROUGH;    }    default: @@ -1265,10 +1274,384 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,          if (C->isOpaque())            return false;      } -    return TLO.CombineTo(Op, -                         TLO.DAG.getConstant(Known.One, dl, Op.getValueType())); +    return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); +  } + +  return false; +} + +bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, +                                                const APInt &DemandedElts, +                                                APInt &KnownUndef, +                                                APInt &KnownZero, +                                                DAGCombinerInfo &DCI) const { +  SelectionDAG &DAG = DCI.DAG; +  TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), +                        !DCI.isBeforeLegalizeOps()); + +  bool Simplified = +      SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); +  if (Simplified) +    DCI.CommitTargetLoweringOpt(TLO); +  return Simplified; +} + +bool TargetLowering::SimplifyDemandedVectorElts( +    SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, +    APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, +    bool AssumeSingleUse) const { +  EVT VT = Op.getValueType(); +  APInt DemandedElts = DemandedEltMask; +  unsigned NumElts = DemandedElts.getBitWidth(); +  assert(VT.isVector() && "Expected vector op"); +  assert(VT.getVectorNumElements() == NumElts && +         "Mask size mismatches value type element count!"); + +  KnownUndef = KnownZero = APInt::getNullValue(NumElts); + +  // Undef operand. +  if (Op.isUndef()) { +    KnownUndef.setAllBits(); +    return false; +  } + +  // If Op has other users, assume that all elements are needed. +  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) +    DemandedElts.setAllBits(); + +  // Not demanding any elements from Op. +  if (DemandedElts == 0) { +    KnownUndef.setAllBits(); +    return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); +  } + +  // Limit search depth. +  if (Depth >= 6) +    return false; + +  SDLoc DL(Op); +  unsigned EltSizeInBits = VT.getScalarSizeInBits(); + +  switch (Op.getOpcode()) { +  case ISD::SCALAR_TO_VECTOR: { +    if (!DemandedElts[0]) { +      KnownUndef.setAllBits(); +      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); +    } +    KnownUndef.setHighBits(NumElts - 1); +    break; +  } +  case ISD::BITCAST: { +    SDValue Src = Op.getOperand(0); +    EVT SrcVT = Src.getValueType(); + +    // We only handle vectors here. +    // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits? +    if (!SrcVT.isVector()) +      break; + +    // Fast handling of 'identity' bitcasts. +    unsigned NumSrcElts = SrcVT.getVectorNumElements(); +    if (NumSrcElts == NumElts) +      return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, +                                        KnownZero, TLO, Depth + 1); + +    APInt SrcZero, SrcUndef; +    APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + +    // Bitcast from 'large element' src vector to 'small element' vector, we +    // must demand a source element if any DemandedElt maps to it. +    if ((NumElts % NumSrcElts) == 0) { +      unsigned Scale = NumElts / NumSrcElts; +      for (unsigned i = 0; i != NumElts; ++i) +        if (DemandedElts[i]) +          SrcDemandedElts.setBit(i / Scale); + +      if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, +                                     TLO, Depth + 1)) +        return true; + +      // If the src element is zero/undef then all the output elements will be - +      // only demanded elements are guaranteed to be correct. +      for (unsigned i = 0; i != NumSrcElts; ++i) { +        if (SrcDemandedElts[i]) { +          if (SrcZero[i]) +            KnownZero.setBits(i * Scale, (i + 1) * Scale); +          if (SrcUndef[i]) +            KnownUndef.setBits(i * Scale, (i + 1) * Scale); +        } +      } +    } + +    // Bitcast from 'small element' src vector to 'large element' vector, we +    // demand all smaller source elements covered by the larger demanded element +    // of this vector. +    if ((NumSrcElts % NumElts) == 0) { +      unsigned Scale = NumSrcElts / NumElts; +      for (unsigned i = 0; i != NumElts; ++i) +        if (DemandedElts[i]) +          SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); + +      if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, +                                     TLO, Depth + 1)) +        return true; + +      // If all the src elements covering an output element are zero/undef, then +      // the output element will be as well, assuming it was demanded. +      for (unsigned i = 0; i != NumElts; ++i) { +        if (DemandedElts[i]) { +          if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) +            KnownZero.setBit(i); +          if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) +            KnownUndef.setBit(i); +        } +      } +    } +    break; +  } +  case ISD::BUILD_VECTOR: { +    // Check all elements and simplify any unused elements with UNDEF. +    if (!DemandedElts.isAllOnesValue()) { +      // Don't simplify BROADCASTS. +      if (llvm::any_of(Op->op_values(), +                       [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { +        SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end()); +        bool Updated = false; +        for (unsigned i = 0; i != NumElts; ++i) { +          if (!DemandedElts[i] && !Ops[i].isUndef()) { +            Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType()); +            KnownUndef.setBit(i); +            Updated = true; +          } +        } +        if (Updated) +          return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops)); +      } +    } +    for (unsigned i = 0; i != NumElts; ++i) { +      SDValue SrcOp = Op.getOperand(i); +      if (SrcOp.isUndef()) { +        KnownUndef.setBit(i); +      } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() && +                 (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) { +        KnownZero.setBit(i); +      } +    } +    break; +  } +  case ISD::CONCAT_VECTORS: { +    EVT SubVT = Op.getOperand(0).getValueType(); +    unsigned NumSubVecs = Op.getNumOperands(); +    unsigned NumSubElts = SubVT.getVectorNumElements(); +    for (unsigned i = 0; i != NumSubVecs; ++i) { +      SDValue SubOp = Op.getOperand(i); +      APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); +      APInt SubUndef, SubZero; +      if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO, +                                     Depth + 1)) +        return true; +      KnownUndef.insertBits(SubUndef, i * NumSubElts); +      KnownZero.insertBits(SubZero, i * NumSubElts); +    } +    break; +  } +  case ISD::INSERT_SUBVECTOR: { +    if (!isa<ConstantSDNode>(Op.getOperand(2))) +      break; +    SDValue Base = Op.getOperand(0); +    SDValue Sub = Op.getOperand(1); +    EVT SubVT = Sub.getValueType(); +    unsigned NumSubElts = SubVT.getVectorNumElements(); +    const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue(); +    if (Idx.uge(NumElts - NumSubElts)) +      break; +    unsigned SubIdx = Idx.getZExtValue(); +    APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); +    APInt SubUndef, SubZero; +    if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, +                                   Depth + 1)) +      return true; +    APInt BaseElts = DemandedElts; +    BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); +    if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, +                                   Depth + 1)) +      return true; +    KnownUndef.insertBits(SubUndef, SubIdx); +    KnownZero.insertBits(SubZero, SubIdx); +    break; +  } +  case ISD::EXTRACT_SUBVECTOR: { +    if (!isa<ConstantSDNode>(Op.getOperand(1))) +      break; +    SDValue Src = Op.getOperand(0); +    unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); +    const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); +    if (Idx.uge(NumSrcElts - NumElts)) +      break; +    // Offset the demanded elts by the subvector index. +    uint64_t SubIdx = Idx.getZExtValue(); +    APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx); +    APInt SrcUndef, SrcZero; +    if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, +                                   Depth + 1)) +      return true; +    KnownUndef = SrcUndef.extractBits(NumElts, SubIdx); +    KnownZero = SrcZero.extractBits(NumElts, SubIdx); +    break; +  } +  case ISD::INSERT_VECTOR_ELT: { +    SDValue Vec = Op.getOperand(0); +    SDValue Scl = Op.getOperand(1); +    auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + +    // For a legal, constant insertion index, if we don't need this insertion +    // then strip it, else remove it from the demanded elts. +    if (CIdx && CIdx->getAPIntValue().ult(NumElts)) { +      unsigned Idx = CIdx->getZExtValue(); +      if (!DemandedElts[Idx]) +        return TLO.CombineTo(Op, Vec); +      DemandedElts.clearBit(Idx); + +      if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef, +                                     KnownZero, TLO, Depth + 1)) +        return true; + +      KnownUndef.clearBit(Idx); +      if (Scl.isUndef()) +        KnownUndef.setBit(Idx); + +      KnownZero.clearBit(Idx); +      if (isNullConstant(Scl) || isNullFPConstant(Scl)) +        KnownZero.setBit(Idx); +      break; +    } + +    APInt VecUndef, VecZero; +    if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO, +                                   Depth + 1)) +      return true; +    // Without knowing the insertion index we can't set KnownUndef/KnownZero. +    break; +  } +  case ISD::VSELECT: { +    APInt DemandedLHS(DemandedElts); +    APInt DemandedRHS(DemandedElts); + +    // TODO - add support for constant vselect masks. + +    // See if we can simplify either vselect operand. +    APInt UndefLHS, ZeroLHS; +    APInt UndefRHS, ZeroRHS; +    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, +                                   ZeroLHS, TLO, Depth + 1)) +      return true; +    if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, +                                   ZeroRHS, TLO, Depth + 1)) +      return true; + +    KnownUndef = UndefLHS & UndefRHS; +    KnownZero = ZeroLHS & ZeroRHS; +    break; +  } +  case ISD::VECTOR_SHUFFLE: { +    ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + +    // Collect demanded elements from shuffle operands.. +    APInt DemandedLHS(NumElts, 0); +    APInt DemandedRHS(NumElts, 0); +    for (unsigned i = 0; i != NumElts; ++i) { +      int M = ShuffleMask[i]; +      if (M < 0 || !DemandedElts[i]) +        continue; +      assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); +      if (M < (int)NumElts) +        DemandedLHS.setBit(M); +      else +        DemandedRHS.setBit(M - NumElts); +    } + +    // See if we can simplify either shuffle operand. +    APInt UndefLHS, ZeroLHS; +    APInt UndefRHS, ZeroRHS; +    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, +                                   ZeroLHS, TLO, Depth + 1)) +      return true; +    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, +                                   ZeroRHS, TLO, Depth + 1)) +      return true; + +    // Simplify mask using undef elements from LHS/RHS. +    bool Updated = false; +    bool IdentityLHS = true, IdentityRHS = true; +    SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); +    for (unsigned i = 0; i != NumElts; ++i) { +      int &M = NewMask[i]; +      if (M < 0) +        continue; +      if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) || +          (M >= (int)NumElts && UndefRHS[M - NumElts])) { +        Updated = true; +        M = -1; +      } +      IdentityLHS &= (M < 0) || (M == (int)i); +      IdentityRHS &= (M < 0) || ((M - NumElts) == i); +    } + +    // Update legal shuffle masks based on demanded elements if it won't reduce +    // to Identity which can cause premature removal of the shuffle mask. +    if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && +        isShuffleMaskLegal(NewMask, VT)) +      return TLO.CombineTo(Op, +                           TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), +                                                    Op.getOperand(1), NewMask)); + +    // Propagate undef/zero elements from LHS/RHS. +    for (unsigned i = 0; i != NumElts; ++i) { +      int M = ShuffleMask[i]; +      if (M < 0) { +        KnownUndef.setBit(i); +      } else if (M < (int)NumElts) { +        if (UndefLHS[M]) +          KnownUndef.setBit(i); +        if (ZeroLHS[M]) +          KnownZero.setBit(i); +      } else { +        if (UndefRHS[M - NumElts]) +          KnownUndef.setBit(i); +        if (ZeroRHS[M - NumElts]) +          KnownZero.setBit(i); +      } +    } +    break; +  } +  case ISD::ADD: +  case ISD::SUB: { +    APInt SrcUndef, SrcZero; +    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, +                                   SrcZero, TLO, Depth + 1)) +      return true; +    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, +                                   KnownZero, TLO, Depth + 1)) +      return true; +    KnownZero &= SrcZero; +    KnownUndef &= SrcUndef; +    break; +  } +  case ISD::TRUNCATE: +    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, +                                   KnownZero, TLO, Depth + 1)) +      return true; +    break; +  default: { +    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) +      if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, +                                                  KnownZero, TLO, Depth)) +        return true; +    break; +  }    } +  assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");    return false;  } @@ -1316,6 +1699,18 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,    return 1;  } +bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( +    SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, +    TargetLoweringOpt &TLO, unsigned Depth) const { +  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || +          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || +          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || +          Op.getOpcode() == ISD::INTRINSIC_VOID) && +         "Should use SimplifyDemandedVectorElts if you don't know whether Op" +         " is a target node!"); +  return false; +} +  // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must  // work with truncating build vectors and vectors with elements of less than  // 8 bits. @@ -1353,16 +1748,6 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {    llvm_unreachable("Invalid boolean contents");  } -SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT, -                                        const SDLoc &DL) const { -  unsigned ElementWidth = VT.getScalarSizeInBits(); -  APInt TrueInt = -      getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent -          ? APInt(ElementWidth, 1) -          : APInt::getAllOnesValue(ElementWidth); -  return DAG.getConstant(TrueInt, DL, VT); -} -  bool TargetLowering::isConstFalseVal(const SDNode *N) const {    if (!N)      return false; @@ -1466,6 +1851,89 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,    return SDValue();  } +/// There are multiple IR patterns that could be checking whether certain +/// truncation of a signed number would be lossy or not. The pattern which is +/// best at IR level, may not lower optimally. Thus, we want to unfold it. +/// We are looking for the following pattern: (KeptBits is a constant) +///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) +/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. +/// KeptBits also can't be 1, that would have been folded to  %x dstcond 0 +/// We will unfold it into the natural trunc+sext pattern: +///   ((%x << C) a>> C) dstcond %x +/// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x) +SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( +    EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, +    const SDLoc &DL) const { +  // We must be comparing with a constant. +  ConstantSDNode *C1; +  if (!(C1 = dyn_cast<ConstantSDNode>(N1))) +    return SDValue(); + +  // N0 should be:  add %x, (1 << (KeptBits-1)) +  if (N0->getOpcode() != ISD::ADD) +    return SDValue(); + +  // And we must be 'add'ing a constant. +  ConstantSDNode *C01; +  if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1)))) +    return SDValue(); + +  SDValue X = N0->getOperand(0); +  EVT XVT = X.getValueType(); + +  // Validate constants ... + +  APInt I1 = C1->getAPIntValue(); + +  ISD::CondCode NewCond; +  if (Cond == ISD::CondCode::SETULT) { +    NewCond = ISD::CondCode::SETEQ; +  } else if (Cond == ISD::CondCode::SETULE) { +    NewCond = ISD::CondCode::SETEQ; +    // But need to 'canonicalize' the constant. +    I1 += 1; +  } else if (Cond == ISD::CondCode::SETUGT) { +    NewCond = ISD::CondCode::SETNE; +    // But need to 'canonicalize' the constant. +    I1 += 1; +  } else if (Cond == ISD::CondCode::SETUGE) { +    NewCond = ISD::CondCode::SETNE; +  } else +    return SDValue(); + +  const APInt &I01 = C01->getAPIntValue(); +  // Both of them must be power-of-two, and the constant from setcc is bigger. +  if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2())) +    return SDValue(); + +  // They are power-of-two, so which bit is set? +  const unsigned KeptBits = I1.logBase2(); +  const unsigned KeptBitsMinusOne = I01.logBase2(); + +  // Magic! +  if (KeptBits != (KeptBitsMinusOne + 1)) +    return SDValue(); +  assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable"); + +  // We don't want to do this in every single case. +  SelectionDAG &DAG = DCI.DAG; +  if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck( +          XVT, KeptBits)) +    return SDValue(); + +  const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; +  assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable"); + +  // Unfold into:  ((%x << C) a>> C) cond %x +  // Where 'cond' will be either 'eq' or 'ne'. +  SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); +  SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); +  SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); +  SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); + +  return T2; +} +  /// Try to simplify a setcc built with the specified operands and cc. If it is  /// unable to simplify it, return a null SDValue.  SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -1473,25 +1941,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,                                        DAGCombinerInfo &DCI,                                        const SDLoc &dl) const {    SelectionDAG &DAG = DCI.DAG; +  EVT OpVT = N0.getValueType();    // These setcc operations always fold.    switch (Cond) {    default: break;    case ISD::SETFALSE: -  case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); +  case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);    case ISD::SETTRUE: -  case ISD::SETTRUE2: { -    TargetLowering::BooleanContent Cnt = -        getBooleanContents(N0->getValueType(0)); -    return DAG.getConstant( -        Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, -        VT); -  } +  case ISD::SETTRUE2:  return DAG.getBoolConstant(true, dl, VT, OpVT);    }    // Ensure that the constant occurs on the RHS and fold constant comparisons. +  // TODO: Handle non-splat vector constants. All undef causes trouble.    ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); -  if (isa<ConstantSDNode>(N0.getNode()) && +  if (isConstOrConstSplat(N0) &&        (DCI.isBeforeLegalizeOps() ||         isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))      return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -1737,7 +2201,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          EVT newVT = N0.getOperand(0).getValueType();          if (DCI.isBeforeLegalizeOps() ||              (isOperationLegal(ISD::SETCC, newVT) && -             getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { +             isCondCodeLegal(Cond, newVT.getSimpleVT()))) {            EVT NewSetCCVT =                getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);            SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); @@ -1867,8 +2331,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        }      } +    if (SDValue V = +            optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) +      return V; +  } + +  // These simplifications apply to splat vectors as well. +  // TODO: Handle more splat vector cases. +  if (auto *N1C = isConstOrConstSplat(N1)) { +    const APInt &C1 = N1C->getAPIntValue(); +      APInt MinVal, MaxVal; -    unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); +    unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();      if (ISD::isSignedIntSetCC(Cond)) {        MinVal = APInt::getSignedMinValue(OperandBitSize);        MaxVal = APInt::getSignedMaxValue(OperandBitSize); @@ -1881,84 +2355,105 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,      if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {        // X >= MIN --> true        if (C1 == MinVal) -        return DAG.getConstant(1, dl, VT); - -      // X >= C0 --> X > (C0 - 1) -      APInt C = C1 - 1; -      ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; -      if ((DCI.isBeforeLegalizeOps() || -           isCondCodeLegal(NewCC, VT.getSimpleVT())) && -          (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && -                                isLegalICmpImmediate(C.getSExtValue())))) { -        return DAG.getSetCC(dl, VT, N0, -                            DAG.getConstant(C, dl, N1.getValueType()), -                            NewCC); +        return DAG.getBoolConstant(true, dl, VT, OpVT); + +      if (!VT.isVector()) { // TODO: Support this for vectors. +        // X >= C0 --> X > (C0 - 1) +        APInt C = C1 - 1; +        ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; +        if ((DCI.isBeforeLegalizeOps() || +             isCondCodeLegal(NewCC, VT.getSimpleVT())) && +            (!N1C->isOpaque() || (C.getBitWidth() <= 64 && +                                  isLegalICmpImmediate(C.getSExtValue())))) { +          return DAG.getSetCC(dl, VT, N0, +                              DAG.getConstant(C, dl, N1.getValueType()), +                              NewCC); +        }        }      }      if (Cond == ISD::SETLE || Cond == ISD::SETULE) {        // X <= MAX --> true        if (C1 == MaxVal) -          return DAG.getConstant(1, dl, VT); +        return DAG.getBoolConstant(true, dl, VT, OpVT);        // X <= C0 --> X < (C0 + 1) -      APInt C = C1 + 1; -      ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; -      if ((DCI.isBeforeLegalizeOps() || -           isCondCodeLegal(NewCC, VT.getSimpleVT())) && -          (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && -                                isLegalICmpImmediate(C.getSExtValue())))) { -        return DAG.getSetCC(dl, VT, N0, -                            DAG.getConstant(C, dl, N1.getValueType()), -                            NewCC); -      } -    } - -    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) -      return DAG.getConstant(0, dl, VT);      // X < MIN --> false -    if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) -      return DAG.getConstant(1, dl, VT);      // X >= MIN --> true -    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) -      return DAG.getConstant(0, dl, VT);      // X > MAX --> false -    if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) -      return DAG.getConstant(1, dl, VT);      // X <= MAX --> true - -    // Canonicalize setgt X, Min --> setne X, Min -    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) -      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); -    // Canonicalize setlt X, Max --> setne X, Max -    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) -      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - -    // If we have setult X, 1, turn it into seteq X, 0 -    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) -      return DAG.getSetCC(dl, VT, N0, -                          DAG.getConstant(MinVal, dl, N0.getValueType()), -                          ISD::SETEQ); -    // If we have setugt X, Max-1, turn it into seteq X, Max -    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) -      return DAG.getSetCC(dl, VT, N0, -                          DAG.getConstant(MaxVal, dl, N0.getValueType()), -                          ISD::SETEQ); +      if (!VT.isVector()) { // TODO: Support this for vectors. +        APInt C = C1 + 1; +        ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; +        if ((DCI.isBeforeLegalizeOps() || +             isCondCodeLegal(NewCC, VT.getSimpleVT())) && +            (!N1C->isOpaque() || (C.getBitWidth() <= 64 && +                                  isLegalICmpImmediate(C.getSExtValue())))) { +          return DAG.getSetCC(dl, VT, N0, +                              DAG.getConstant(C, dl, N1.getValueType()), +                              NewCC); +        } +      } +    } -    // If we have "setcc X, C0", check to see if we can shrink the immediate -    // by changing cc. +    if (Cond == ISD::SETLT || Cond == ISD::SETULT) { +      if (C1 == MinVal) +        return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false + +      // TODO: Support this for vectors after legalize ops. +      if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { +        // Canonicalize setlt X, Max --> setne X, Max +        if (C1 == MaxVal) +          return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + +        // If we have setult X, 1, turn it into seteq X, 0 +        if (C1 == MinVal+1) +          return DAG.getSetCC(dl, VT, N0, +                              DAG.getConstant(MinVal, dl, N0.getValueType()), +                              ISD::SETEQ); +      } +    } -    // SETUGT X, SINTMAX  -> SETLT X, 0 -    if (Cond == ISD::SETUGT && -        C1 == APInt::getSignedMaxValue(OperandBitSize)) -      return DAG.getSetCC(dl, VT, N0, -                          DAG.getConstant(0, dl, N1.getValueType()), -                          ISD::SETLT); +    if (Cond == ISD::SETGT || Cond == ISD::SETUGT) { +      if (C1 == MaxVal) +        return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false + +      // TODO: Support this for vectors after legalize ops. +      if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { +        // Canonicalize setgt X, Min --> setne X, Min +        if (C1 == MinVal) +          return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + +        // If we have setugt X, Max-1, turn it into seteq X, Max +        if (C1 == MaxVal-1) +          return DAG.getSetCC(dl, VT, N0, +                              DAG.getConstant(MaxVal, dl, N0.getValueType()), +                              ISD::SETEQ); +      } +    } -    // SETULT X, SINTMIN  -> SETGT X, -1 -    if (Cond == ISD::SETULT && -        C1 == APInt::getSignedMinValue(OperandBitSize)) { -      SDValue ConstMinusOne = -          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, -                          N1.getValueType()); -      return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); +    // If we have "setcc X, C0", check to see if we can shrink the immediate +    // by changing cc. +    // TODO: Support this for vectors after legalize ops. +    if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { +      // SETUGT X, SINTMAX  -> SETLT X, 0 +      if (Cond == ISD::SETUGT && +          C1 == APInt::getSignedMaxValue(OperandBitSize)) +        return DAG.getSetCC(dl, VT, N0, +                            DAG.getConstant(0, dl, N1.getValueType()), +                            ISD::SETLT); + +      // SETULT X, SINTMIN  -> SETGT X, -1 +      if (Cond == ISD::SETULT && +          C1 == APInt::getSignedMinValue(OperandBitSize)) { +        SDValue ConstMinusOne = +            DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, +                            N1.getValueType()); +        return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); +      }      } +  } + +  // Back to non-vector simplifications. +  // TODO: Can we do these for vector splats? +  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { +    const APInt &C1 = N1C->getAPIntValue();      // Fold bit comparisons when we can.      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && @@ -1967,9 +2462,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          N0.getOpcode() == ISD::AND) {        auto &DL = DAG.getDataLayout();        if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { -        EVT ShiftTy = DCI.isBeforeLegalize() -                          ? getPointerTy(DL) -                          : getShiftAmountTy(N0.getValueType(), DL); +        EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, +                                       !DCI.isBeforeLegalize());          if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3            // Perform the xform if the AND RHS is a single bit.            if (AndRHS->getAPIntValue().isPowerOf2()) { @@ -2001,9 +2495,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,            if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {              unsigned ShiftBits = AndRHSC.countTrailingZeros();              auto &DL = DAG.getDataLayout(); -            EVT ShiftTy = DCI.isBeforeLegalize() -                              ? getPointerTy(DL) -                              : getShiftAmountTy(N0.getValueType(), DL); +            EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, +                                           !DCI.isBeforeLegalize());              EVT CmpTy = N0.getValueType();              SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),                                          DAG.getConstant(ShiftBits, dl, @@ -2033,9 +2526,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,          if (ShiftBits && NewC.getMinSignedBits() <= 64 &&            isLegalICmpImmediate(NewC.getSExtValue())) {            auto &DL = DAG.getDataLayout(); -          EVT ShiftTy = DCI.isBeforeLegalize() -                            ? getPointerTy(DL) -                            : getShiftAmountTy(N0.getValueType(), DL); +          EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, +                                         !DCI.isBeforeLegalize());            EVT CmpTy = N0.getValueType();            SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,                                        DAG.getConstant(ShiftBits, dl, ShiftTy)); @@ -2058,9 +2550,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,        switch (ISD::getUnorderedFlavor(Cond)) {        default: llvm_unreachable("Unknown flavor!");        case 0:  // Known false. -        return DAG.getConstant(0, dl, VT); +        return DAG.getBoolConstant(false, dl, VT, OpVT);        case 1:  // Known true. -        return DAG.getConstant(1, dl, VT); +        return DAG.getBoolConstant(true, dl, VT, OpVT);        case 2:  // Undefined.          return DAG.getUNDEF(VT);        } @@ -2124,31 +2616,24 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,    if (N0 == N1) {      // The sext(setcc()) => setcc() optimization relies on the appropriate      // constant being emitted. -    uint64_t EqVal = 0; -    switch (getBooleanContents(N0.getValueType())) { -    case UndefinedBooleanContent: -    case ZeroOrOneBooleanContent: -      EqVal = ISD::isTrueWhenEqual(Cond); -      break; -    case ZeroOrNegativeOneBooleanContent: -      EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; -      break; -    } + +    bool EqTrue = ISD::isTrueWhenEqual(Cond);      // We can always fold X == X for integer setcc's. -    if (N0.getValueType().isInteger()) { -      return DAG.getConstant(EqVal, dl, VT); -    } +    if (N0.getValueType().isInteger()) +      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); +      unsigned UOF = ISD::getUnorderedFlavor(Cond);      if (UOF == 2)   // FP operators that are undefined on NaNs. -      return DAG.getConstant(EqVal, dl, VT); -    if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) -      return DAG.getConstant(EqVal, dl, VT); +      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); +    if (UOF == unsigned(EqTrue)) +      return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);      // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO      // if it is not already.      ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; -    if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || -          getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) +    if (NewCond != Cond && +        (DCI.isBeforeLegalizeOps() || +         isCondCodeLegal(NewCond, N0.getSimpleValueType())))        return DAG.getSetCC(dl, VT, N0, N1, NewCond);    } @@ -2237,7 +2722,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,              SDValue SH = DAG.getNode(                  ISD::SHL, dl, N1.getValueType(), N1,                  DAG.getConstant(1, dl, -                                getShiftAmountTy(N1.getValueType(), DL))); +                                getShiftAmountTy(N1.getValueType(), DL, +                                                 !DCI.isBeforeLegalize())));              if (!DCI.isCalledByLegalizer())                DCI.AddToWorklist(SH.getNode());              return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -2262,7 +2748,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,            // X == (Z-X)  --> X<<1 == Z            SDValue SH = DAG.getNode(                ISD::SHL, dl, N1.getValueType(), N0, -              DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL))); +              DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL, +                                                      !DCI.isBeforeLegalize())));            if (!DCI.isCalledByLegalizer())              DCI.AddToWorklist(SH.getNode());            return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -2276,50 +2763,52 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,    // Fold away ALL boolean setcc's.    SDValue Temp; -  if (N0.getValueType() == MVT::i1 && foldBooleans) { +  if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { +    EVT OpVT = N0.getValueType();      switch (Cond) {      default: llvm_unreachable("Unknown integer setcc!");      case ISD::SETEQ:  // X == Y  -> ~(X^Y) -      Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); -      N0 = DAG.getNOT(dl, Temp, MVT::i1); +      Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); +      N0 = DAG.getNOT(dl, Temp, OpVT);        if (!DCI.isCalledByLegalizer())          DCI.AddToWorklist(Temp.getNode());        break;      case ISD::SETNE:  // X != Y   -->  (X^Y) -      N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); +      N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);        break;      case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y      case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y -      Temp = DAG.getNOT(dl, N0, MVT::i1); -      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); +      Temp = DAG.getNOT(dl, N0, OpVT); +      N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);        if (!DCI.isCalledByLegalizer())          DCI.AddToWorklist(Temp.getNode());        break;      case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X      case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X -      Temp = DAG.getNOT(dl, N1, MVT::i1); -      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); +      Temp = DAG.getNOT(dl, N1, OpVT); +      N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);        if (!DCI.isCalledByLegalizer())          DCI.AddToWorklist(Temp.getNode());        break;      case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y      case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y -      Temp = DAG.getNOT(dl, N0, MVT::i1); -      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); +      Temp = DAG.getNOT(dl, N0, OpVT); +      N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);        if (!DCI.isCalledByLegalizer())          DCI.AddToWorklist(Temp.getNode());        break;      case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X      case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X -      Temp = DAG.getNOT(dl, N1, MVT::i1); -      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); +      Temp = DAG.getNOT(dl, N1, OpVT); +      N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);        break;      } -    if (VT != MVT::i1) { +    if (VT.getScalarType() != MVT::i1) {        if (!DCI.isCalledByLegalizer())          DCI.AddToWorklist(N0.getNode());        // FIXME: If running after legalize, we probably can't do this. -      N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); +      ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT)); +      N0 = DAG.getNode(ExtendCode, dl, VT, N0);      }      return N0;    } @@ -2928,7 +3417,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,    }  } -/// \brief Given an exact SDIV by a constant, create a multiplication +/// Given an exact SDIV by a constant, create a multiplication  /// with the multiplicative inverse of the constant.  static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,                                const SDLoc &dl, SelectionDAG &DAG, @@ -2970,7 +3459,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,    return SDValue();  } -/// \brief Given an ISD::SDIV node expressing a divide by constant, +/// Given an ISD::SDIV node expressing a divide by constant,  /// return a DAG expression to select that will generate the same value by  /// multiplying by a magic number.  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3034,7 +3523,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,    return DAG.getNode(ISD::ADD, dl, VT, Q, T);  } -/// \brief Given an ISD::UDIV node expressing a divide by constant, +/// Given an ISD::UDIV node expressing a divide by constant,  /// return a DAG expression to select that will generate the same value by  /// multiplying by a magic number.  /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3413,9 +3902,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,    return DAG.getMergeValues({ Value, NewChain }, SL);  } -// FIXME: This relies on each element having a byte size, otherwise the stride -// is 0 and just overwrites the same location. ExpandStore currently expects -// this broken behavior.  SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,                                               SelectionDAG &DAG) const {    SDLoc SL(ST); @@ -3432,11 +3918,43 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,    // The type of data as saved in memory.    EVT MemSclVT = StVT.getScalarType(); -  // Store Stride in bytes -  unsigned Stride = MemSclVT.getSizeInBits() / 8;    EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());    unsigned NumElem = StVT.getVectorNumElements(); +  // A vector must always be stored in memory as-is, i.e. without any padding +  // between the elements, since various code depend on it, e.g. in the +  // handling of a bitcast of a vector type to int, which may be done with a +  // vector store followed by an integer load. A vector that does not have +  // elements that are byte-sized must therefore be stored as an integer +  // built out of the extracted vector elements. +  if (!MemSclVT.isByteSized()) { +    unsigned NumBits = StVT.getSizeInBits(); +    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + +    SDValue CurrVal = DAG.getConstant(0, SL, IntVT); + +    for (unsigned Idx = 0; Idx < NumElem; ++Idx) { +      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, +                                DAG.getConstant(Idx, SL, IdxVT)); +      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); +      SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); +      unsigned ShiftIntoIdx = +          (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); +      SDValue ShiftAmount = +          DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT); +      SDValue ShiftedElt = +          DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount); +      CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt); +    } + +    return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), +                        ST->getAlignment(), ST->getMemOperand()->getFlags(), +                        ST->getAAInfo()); +  } + +  // Store Stride in bytes +  unsigned Stride = MemSclVT.getSizeInBits() / 8; +  assert (Stride && "Zero stride!");    // Extract each of the elements from the original vector and save them into    // memory individually.    SmallVector<SDValue, 8> Stores; @@ -3475,6 +3993,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {        if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {          // Scalarize the load and let the individual components be handled.          SDValue Scalarized = scalarizeVectorLoad(LD, DAG); +        if (Scalarized->getOpcode() == ISD::MERGE_VALUES) +	  return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));          return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));        } diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp index b35bf6ba3a7b..d3454ca6ba6a 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -53,6 +53,7 @@  #include "llvm/ADT/SetVector.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"  #include "llvm/CodeGen/MachineDominators.h" @@ -62,11 +63,13 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineLoopInfo.h"  #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"  #include "llvm/CodeGen/MachinePostDominators.h"  #include "llvm/CodeGen/RegisterClassInfo.h"  #include "llvm/CodeGen/RegisterScavenging.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/IR/Attributes.h" @@ -97,7 +100,7 @@ EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,  namespace { -/// \brief Class to determine where the safe point to insert the +/// Class to determine where the safe point to insert the  /// prologue and epilogue are.  /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the  /// shrink-wrapping term for prologue/epilogue placement, this pass @@ -128,6 +131,9 @@ class ShrinkWrap : public MachineFunctionPass {    /// are in the same loop.    MachineLoopInfo *MLI; +  // Emit remarks. +  MachineOptimizationRemarkEmitter *ORE = nullptr; +    /// Frequency of the Entry block.    uint64_t EntryFreq; @@ -137,6 +143,9 @@ class ShrinkWrap : public MachineFunctionPass {    /// Current opcode for frame destroy.    unsigned FrameDestroyOpcode; +  /// Stack pointer register, used by llvm.{savestack,restorestack} +  unsigned SP; +    /// Entry block.    const MachineBasicBlock *Entry; @@ -148,7 +157,7 @@ class ShrinkWrap : public MachineFunctionPass {    /// Current MachineFunction.    MachineFunction *MachineFunc; -  /// \brief Check if \p MI uses or defines a callee-saved register or +  /// Check if \p MI uses or defines a callee-saved register or    /// a frame index. If this is the case, this means \p MI must happen    /// after Save and before Restore.    bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; @@ -168,14 +177,14 @@ class ShrinkWrap : public MachineFunctionPass {      return CurrentCSRs;    } -  /// \brief Update the Save and Restore points such that \p MBB is in +  /// Update the Save and Restore points such that \p MBB is in    /// the region that is dominated by Save and post-dominated by Restore    /// and Save and Restore still match the safe point definition.    /// Such point may not exist and Save and/or Restore may be null after    /// this call.    void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); -  /// \brief Initialize the pass for \p MF. +  /// Initialize the pass for \p MF.    void init(MachineFunction &MF) {      RCI.runOnMachineFunction(MF);      MDT = &getAnalysis<MachineDominatorTree>(); @@ -184,10 +193,13 @@ class ShrinkWrap : public MachineFunctionPass {      Restore = nullptr;      MBFI = &getAnalysis<MachineBlockFrequencyInfo>();      MLI = &getAnalysis<MachineLoopInfo>(); +    ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();      EntryFreq = MBFI->getEntryFreq(); -    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +    const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); +    const TargetInstrInfo &TII = *Subtarget.getInstrInfo();      FrameSetupOpcode = TII.getCallFrameSetupOpcode();      FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); +    SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore();      Entry = &MF.front();      CurrentCSRs.clear();      MachineFunc = &MF; @@ -199,7 +211,7 @@ class ShrinkWrap : public MachineFunctionPass {    /// shrink-wrapping.    bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } -  /// \brief Check if shrink wrapping is enabled for this target and function. +  /// Check if shrink wrapping is enabled for this target and function.    static bool isShrinkWrapEnabled(const MachineFunction &MF);  public: @@ -215,12 +227,18 @@ public:      AU.addRequired<MachineDominatorTree>();      AU.addRequired<MachinePostDominatorTree>();      AU.addRequired<MachineLoopInfo>(); +    AU.addRequired<MachineOptimizationRemarkEmitterPass>();      MachineFunctionPass::getAnalysisUsage(AU);    } +  MachineFunctionProperties getRequiredProperties() const override { +    return MachineFunctionProperties().set( +      MachineFunctionProperties::Property::NoVRegs); +  } +    StringRef getPassName() const override { return "Shrink Wrapping analysis"; } -  /// \brief Perform the shrink-wrapping analysis and update +  /// Perform the shrink-wrapping analysis and update    /// the MachineFrameInfo attached to \p MF with the results.    bool runOnMachineFunction(MachineFunction &MF) override;  }; @@ -236,28 +254,34 @@ INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)  INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)  INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)  INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)  INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)  bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,                                   RegScavenger *RS) const { -  // Ignore DBG_VALUE and other meta instructions that must not affect codegen. -  if (MI.isMetaInstruction()) -    return false; -    if (MI.getOpcode() == FrameSetupOpcode ||        MI.getOpcode() == FrameDestroyOpcode) { -    DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); +    LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');      return true;    }    for (const MachineOperand &MO : MI.operands()) {      bool UseOrDefCSR = false;      if (MO.isReg()) { +      // Ignore instructions like DBG_VALUE which don't read/def the register. +      if (!MO.isDef() && !MO.readsReg()) +        continue;        unsigned PhysReg = MO.getReg();        if (!PhysReg)          continue;        assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&               "Unallocated register?!"); -      UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); +      // The stack pointer is not normally described as a callee-saved register +      // in calling convention definitions, so we need to watch for it +      // separately. An SP mentioned by a call instruction, we can ignore, +      // though, as it's harmless and we do not want to effectively disable tail +      // calls by forcing the restore point to post-dominate them. +      UseOrDefCSR = (!MI.isCall() && PhysReg == SP) || +                    RCI.getLastCalleeSavedAlias(PhysReg);      } else if (MO.isRegMask()) {        // Check if this regmask clobbers any of the CSRs.        for (unsigned Reg : getCurrentCSRs(RS)) { @@ -267,16 +291,17 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,          }        }      } -    if (UseOrDefCSR || MO.isFI()) { -      DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" -                   << MO.isFI() << "): " << MI << '\n'); +    // Skip FrameIndex operands in DBG_VALUE instructions. +    if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) { +      LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" +                        << MO.isFI() << "): " << MI << '\n');        return true;      }    }    return false;  } -/// \brief Helper function to find the immediate (post) dominator. +/// Helper function to find the immediate (post) dominator.  template <typename ListOfBBs, typename DominanceAnalysis>  static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,                                     DominanceAnalysis &Dom) { @@ -300,7 +325,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,      Save = MDT->findNearestCommonDominator(Save, &MBB);    if (!Save) { -    DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); +    LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");      return;    } @@ -334,7 +359,8 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,    }    if (!Restore) { -    DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n"); +    LLVM_DEBUG( +        dbgs() << "Restore point needs to be spanned on several blocks\n");      return;    } @@ -413,38 +439,16 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,    }  } -/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI. -/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the -/// loop header. -static bool isProperBackedge(const MachineLoopInfo &MLI, -                             const MachineBasicBlock *SrcBB, -                             const MachineBasicBlock *DestBB) { -  for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop; -       Loop = Loop->getParentLoop()) { -    if (Loop->getHeader() == DestBB) -      return true; -  } -  return false; -} +static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE, +                              StringRef RemarkName, StringRef RemarkMessage, +                              const DiagnosticLocation &Loc, +                              const MachineBasicBlock *MBB) { +  ORE->emit([&]() { +    return MachineOptimizationRemarkMissed(DEBUG_TYPE, RemarkName, Loc, MBB) +           << RemarkMessage; +  }); -/// Check if the CFG of \p MF is irreducible. -static bool isIrreducibleCFG(const MachineFunction &MF, -                             const MachineLoopInfo &MLI) { -  const MachineBasicBlock *Entry = &*MF.begin(); -  ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry); -  BitVector VisitedBB(MF.getNumBlockIDs()); -  for (const MachineBasicBlock *MBB : RPOT) { -    VisitedBB.set(MBB->getNumber()); -    for (const MachineBasicBlock *SuccBB : MBB->successors()) { -      if (!VisitedBB.test(SuccBB->getNumber())) -        continue; -      // We already visited SuccBB, thus MBB->SuccBB must be a backedge. -      // Check that the head matches what we have in the loop information. -      // Otherwise, we have an irreducible graph. -      if (!isProperBackedge(MLI, MBB, SuccBB)) -        return true; -    } -  } +  LLVM_DEBUG(dbgs() << RemarkMessage << '\n');    return false;  } @@ -452,19 +456,21 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {    if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))      return false; -  DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');    init(MF); -  if (isIrreducibleCFG(MF, *MLI)) { +  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); +  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {      // If MF is irreducible, a block may be in a loop without      // MachineLoopInfo reporting it. I.e., we may use the      // post-dominance property in loops, which lead to incorrect      // results. Moreover, we may miss that the prologue and      // epilogue are not in the same loop, leading to unbalanced      // construction/deconstruction of the stack frame. -    DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n"); -    return false; +    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG", +                             "Irreducible CFGs are not supported yet.", +                             MF.getFunction().getSubprogram(), &MF.front());    }    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -472,12 +478,28 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {        TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);    for (MachineBasicBlock &MBB : MF) { -    DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() -                 << '\n'); - -    if (MBB.isEHFuncletEntry()) { -      DEBUG(dbgs() << "EH Funclets are not supported yet.\n"); -      return false; +    LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' +                      << MBB.getName() << '\n'); + +    if (MBB.isEHFuncletEntry()) +      return giveUpWithRemarks(ORE, "UnsupportedEHFunclets", +                               "EH Funclets are not supported yet.", +                               MBB.front().getDebugLoc(), &MBB); + +    if (MBB.isEHPad()) { +      // Push the prologue and epilogue outside of +      // the region that may throw by making sure +      // that all the landing pads are at least at the +      // boundary of the save and restore points. +      // The problem with exceptions is that the throw +      // is not properly modeled and in particular, a +      // basic block can jump out from the middle. +      updateSaveRestorePoints(MBB, RS.get()); +      if (!ArePointsInteresting()) { +        LLVM_DEBUG(dbgs() << "EHPad prevents shrink-wrapping\n"); +        return false; +      } +      continue;      }      for (const MachineInstr &MI : MBB) { @@ -489,7 +511,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {        // If we are at a point where we cannot improve the placement of        // save/restore instructions, just give up.        if (!ArePointsInteresting()) { -        DEBUG(dbgs() << "No Shrink wrap candidate found\n"); +        LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");          return false;        }        // No need to look for other instructions, this basic block @@ -502,20 +524,21 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {      // because it means we did not encounter any frame/CSR related code.      // Otherwise, we would have returned from the previous loop.      assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); -    DEBUG(dbgs() << "Nothing to shrink-wrap\n"); +    LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");      return false;    } -  DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq -               << '\n'); +  LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq +                    << '\n');    const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();    do { -    DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " -                 << Save->getNumber() << ' ' << Save->getName() << ' ' -                 << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " -                 << Restore->getNumber() << ' ' << Restore->getName() << ' ' -                 << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); +    LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " +                      << Save->getNumber() << ' ' << Save->getName() << ' ' +                      << MBFI->getBlockFreq(Save).getFrequency() +                      << "\nRestore: " << Restore->getNumber() << ' ' +                      << Restore->getName() << ' ' +                      << MBFI->getBlockFreq(Restore).getFrequency() << '\n');      bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;      if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && @@ -523,7 +546,8 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {          ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&           TFI->canUseAsEpilogue(*Restore)))        break; -    DEBUG(dbgs() << "New points are too expensive or invalid for the target\n"); +    LLVM_DEBUG( +        dbgs() << "New points are too expensive or invalid for the target\n");      MachineBasicBlock *NewBB;      if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {        Save = FindIDom<>(*Save, Save->predecessors(), *MDT); @@ -545,9 +569,10 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {      return false;    } -  DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() -               << ' ' << Save->getName() << "\nRestore: " -               << Restore->getNumber() << ' ' << Restore->getName() << '\n'); +  LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " +                    << Save->getNumber() << ' ' << Save->getName() +                    << "\nRestore: " << Restore->getNumber() << ' ' +                    << Restore->getName() << '\n');    MachineFrameInfo &MFI = MF.getFrameInfo();    MFI.setSavePoint(Save); diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 17a3a84ecda5..5d2669f5ae92 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -16,6 +16,7 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" @@ -27,7 +28,6 @@  #include "llvm/Pass.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h"  using namespace llvm;  #define DEBUG_TYPE "sjljehprepare" @@ -64,7 +64,6 @@ public:  private:    bool setupEntryBlockAndCallSites(Function &F); -  bool undoSwiftErrorSelect(Function &F);    void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);    Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);    void lowerIncomingArguments(Function &F); @@ -233,6 +232,13 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {    assert(AfterAllocaInsPt != F.front().end());    for (auto &AI : F.args()) { +    // Swift error really is a register that we model as memory -- instruction +    // selection will perform mem-to-reg for us and spill/reload appropriately +    // around calls that clobber it. There is no need to spill this +    // value to the stack and doing so would not be allowed. +    if (AI.isSwiftError()) +      continue; +      Type *Ty = AI.getType();      // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. @@ -301,8 +307,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,        for (InvokeInst *Invoke : Invokes) {          BasicBlock *UnwindBlock = Invoke->getUnwindDest();          if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) { -          DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around " -                       << UnwindBlock->getName() << "\n"); +          LLVM_DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around " +                            << UnwindBlock->getName() << "\n");            NeedsSpill = true;            break;          } @@ -462,25 +468,6 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {    return true;  } -bool SjLjEHPrepare::undoSwiftErrorSelect(Function &F) { -  // We have inserted dummy copies 'select true, arg, undef' in the entry block -  // for arguments to simplify this pass. -  // swifterror arguments cannot be used in this way. Undo the select for the -  // swifterror argument. -  for (auto &AI : F.args()) { -    if (AI.isSwiftError()) { -      assert(AI.hasOneUse() && "Must have converted the argument to a select"); -      auto *Select = dyn_cast<SelectInst>(AI.use_begin()->getUser()); -      assert(Select && "There must be single select user"); -      auto *OrigSwiftError = cast<Argument>(Select->getTrueValue()); -      Select->replaceAllUsesWith(OrigSwiftError); -      Select->eraseFromParent(); -      return true; -    } -  } -  return false; -} -  bool SjLjEHPrepare::runOnFunction(Function &F) {    Module &M = *F.getParent();    RegisterFn = M.getOrInsertFunction( @@ -499,7 +486,5 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {    FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);    bool Res = setupEntryBlockAndCallSites(F); -  if (Res) -    Res |= undoSwiftErrorSelect(F);    return Res;  } diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index ea74c777e1e2..ed74b3e4fa19 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -10,6 +10,7 @@  #include "llvm/CodeGen/SlotIndexes.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" @@ -73,7 +74,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {      SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);      for (MachineInstr &MI : MBB) { -      if (MI.isDebugValue()) +      if (MI.isDebugInstr())          continue;        // Insert a store index for the instr. @@ -94,9 +95,9 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {    }    // Sort the Idx2MBBMap -  std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); +  llvm::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); -  DEBUG(mf->print(dbgs(), this)); +  LLVM_DEBUG(mf->print(dbgs(), this));    // And we're done!    return false; @@ -146,7 +147,7 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {  void SlotIndexes::renumberIndexes() {    // Renumber updates the index of every element of the index list. -  DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); +  LLVM_DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");    ++NumGlobalRenum;    unsigned index = 0; @@ -173,8 +174,8 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {      // If the next index is bigger, we have caught up.    } while (curItr != indexList.end() && curItr->getIndex() <= index); -  DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-' -               << index << " ***\n"); +  LLVM_DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() +                    << '-' << index << " ***\n");    ++NumLocalRenum;  } @@ -244,7 +245,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,    for (MachineBasicBlock::iterator I = End; I != Begin;) {      --I;      MachineInstr &MI = *I; -    if (!MI.isDebugValue() && mi2iMap.find(&MI) == mi2iMap.end()) +    if (!MI.isDebugInstr() && mi2iMap.find(&MI) == mi2iMap.end())        insertMachineInstrInMaps(MI);    }  } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index b989b54d4190..f6786b30b21c 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -246,7 +246,7 @@ void SpillPlacement::activate(unsigned n) {    }  } -/// \brief Set the threshold for a given entry frequency. +/// Set the threshold for a given entry frequency.  ///  /// Set the threshold relative to \c Entry.  Since the threshold is used as a  /// bound on the open interval (-Threshold;Threshold), 1 is the minimum diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 1628ee28b8a3..d639f4475301 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -39,6 +39,7 @@  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/DebugLoc.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/Support/Allocator.h" @@ -191,7 +192,7 @@ void SplitAnalysis::analyzeUses() {      // I am looking at you, RegisterCoalescer!      DidRepairRange = true;      ++NumRepairs; -    DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); +    LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");      const_cast<LiveIntervals&>(LIS)        .shrinkToUses(const_cast<LiveInterval*>(CurLI));      UseBlocks.clear(); @@ -201,10 +202,9 @@ void SplitAnalysis::analyzeUses() {      assert(fixed && "Couldn't fix broken live interval");    } -  DEBUG(dbgs() << "Analyze counted " -               << UseSlots.size() << " instrs in " -               << UseBlocks.size() << " blocks, through " -               << NumThroughBlocks << " blocks.\n"); +  LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in " +                    << UseBlocks.size() << " blocks, through " +                    << NumThroughBlocks << " blocks.\n");  }  /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks @@ -685,20 +685,20 @@ unsigned SplitEditor::openIntv() {  void SplitEditor::selectIntv(unsigned Idx) {    assert(Idx != 0 && "Cannot select the complement interval");    assert(Idx < Edit->size() && "Can only select previously opened interval"); -  DEBUG(dbgs() << "    selectIntv " << OpenIdx << " -> " << Idx << '\n'); +  LLVM_DEBUG(dbgs() << "    selectIntv " << OpenIdx << " -> " << Idx << '\n');    OpenIdx = Idx;  }  SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {    assert(OpenIdx && "openIntv not called before enterIntvBefore"); -  DEBUG(dbgs() << "    enterIntvBefore " << Idx); +  LLVM_DEBUG(dbgs() << "    enterIntvBefore " << Idx);    Idx = Idx.getBaseIndex();    VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);    if (!ParentVNI) { -    DEBUG(dbgs() << ": not live\n"); +    LLVM_DEBUG(dbgs() << ": not live\n");      return Idx;    } -  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); +  LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');    MachineInstr *MI = LIS.getInstructionFromIndex(Idx);    assert(MI && "enterIntvBefore called with invalid index"); @@ -708,14 +708,14 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {  SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {    assert(OpenIdx && "openIntv not called before enterIntvAfter"); -  DEBUG(dbgs() << "    enterIntvAfter " << Idx); +  LLVM_DEBUG(dbgs() << "    enterIntvAfter " << Idx);    Idx = Idx.getBoundaryIndex();    VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);    if (!ParentVNI) { -    DEBUG(dbgs() << ": not live\n"); +    LLVM_DEBUG(dbgs() << ": not live\n");      return Idx;    } -  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); +  LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');    MachineInstr *MI = LIS.getInstructionFromIndex(Idx);    assert(MI && "enterIntvAfter called with invalid index"); @@ -728,18 +728,18 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {    assert(OpenIdx && "openIntv not called before enterIntvAtEnd");    SlotIndex End = LIS.getMBBEndIdx(&MBB);    SlotIndex Last = End.getPrevSlot(); -  DEBUG(dbgs() << "    enterIntvAtEnd " << printMBBReference(MBB) << ", " -               << Last); +  LLVM_DEBUG(dbgs() << "    enterIntvAtEnd " << printMBBReference(MBB) << ", " +                    << Last);    VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);    if (!ParentVNI) { -    DEBUG(dbgs() << ": not live\n"); +    LLVM_DEBUG(dbgs() << ": not live\n");      return End;    } -  DEBUG(dbgs() << ": valno " << ParentVNI->id); +  LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id);    VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,                                SA.getLastSplitPointIter(&MBB));    RegAssign.insert(VNI->def, End, OpenIdx); -  DEBUG(dump()); +  LLVM_DEBUG(dump());    return VNI->def;  } @@ -750,23 +750,23 @@ void SplitEditor::useIntv(const MachineBasicBlock &MBB) {  void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {    assert(OpenIdx && "openIntv not called before useIntv"); -  DEBUG(dbgs() << "    useIntv [" << Start << ';' << End << "):"); +  LLVM_DEBUG(dbgs() << "    useIntv [" << Start << ';' << End << "):");    RegAssign.insert(Start, End, OpenIdx); -  DEBUG(dump()); +  LLVM_DEBUG(dump());  }  SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {    assert(OpenIdx && "openIntv not called before leaveIntvAfter"); -  DEBUG(dbgs() << "    leaveIntvAfter " << Idx); +  LLVM_DEBUG(dbgs() << "    leaveIntvAfter " << Idx);    // The interval must be live beyond the instruction at Idx.    SlotIndex Boundary = Idx.getBoundaryIndex();    VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);    if (!ParentVNI) { -    DEBUG(dbgs() << ": not live\n"); +    LLVM_DEBUG(dbgs() << ": not live\n");      return Boundary.getNextSlot();    } -  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); +  LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');    MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);    assert(MI && "No instruction at index"); @@ -788,16 +788,16 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {  SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {    assert(OpenIdx && "openIntv not called before leaveIntvBefore"); -  DEBUG(dbgs() << "    leaveIntvBefore " << Idx); +  LLVM_DEBUG(dbgs() << "    leaveIntvBefore " << Idx);    // The interval must be live into the instruction at Idx.    Idx = Idx.getBaseIndex();    VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);    if (!ParentVNI) { -    DEBUG(dbgs() << ": not live\n"); +    LLVM_DEBUG(dbgs() << ": not live\n");      return Idx.getNextSlot();    } -  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); +  LLVM_DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');    MachineInstr *MI = LIS.getInstructionFromIndex(Idx);    assert(MI && "No instruction at index"); @@ -808,19 +808,19 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {  SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {    assert(OpenIdx && "openIntv not called before leaveIntvAtTop");    SlotIndex Start = LIS.getMBBStartIdx(&MBB); -  DEBUG(dbgs() << "    leaveIntvAtTop " << printMBBReference(MBB) << ", " -               << Start); +  LLVM_DEBUG(dbgs() << "    leaveIntvAtTop " << printMBBReference(MBB) << ", " +                    << Start);    VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);    if (!ParentVNI) { -    DEBUG(dbgs() << ": not live\n"); +    LLVM_DEBUG(dbgs() << ": not live\n");      return Start;    }    VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,                                MBB.SkipPHIsLabelsAndDebug(MBB.begin()));    RegAssign.insert(Start, VNI->def, OpenIdx); -  DEBUG(dump()); +  LLVM_DEBUG(dump());    return VNI->def;  } @@ -835,9 +835,9 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {    // The complement interval will be extended as needed by LRCalc.extend().    if (ParentVNI)      forceRecompute(0, *ParentVNI); -  DEBUG(dbgs() << "    overlapIntv [" << Start << ';' << End << "):"); +  LLVM_DEBUG(dbgs() << "    overlapIntv [" << Start << ';' << End << "):");    RegAssign.insert(Start, End, OpenIdx); -  DEBUG(dump()); +  LLVM_DEBUG(dump());  }  //===----------------------------------------------------------------------===// @@ -846,7 +846,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {  void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {    LiveInterval *LI = &LIS.getInterval(Edit->get(0)); -  DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); +  LLVM_DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");    RegAssignMap::iterator AssignI;    AssignI.setMap(RegAssign); @@ -859,9 +859,9 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {      MachineBasicBlock::iterator MBBI(MI);      bool AtBegin;      do AtBegin = MBBI == MBB->begin(); -    while (!AtBegin && (--MBBI)->isDebugValue()); +    while (!AtBegin && (--MBBI)->isDebugInstr()); -    DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); +    LLVM_DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);      LIS.removeVRegDefAt(*LI, Def);      LIS.RemoveMachineInstrFromMaps(*MI);      MI->eraseFromParent(); @@ -876,11 +876,12 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {        continue;      unsigned RegIdx = AssignI.value();      if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) { -      DEBUG(dbgs() << "  cannot find simple kill of RegIdx " << RegIdx << '\n'); +      LLVM_DEBUG(dbgs() << "  cannot find simple kill of RegIdx " << RegIdx +                        << '\n');        forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def));      } else {        SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot(); -      DEBUG(dbgs() << "  move kill to " << Kill << '\t' << *MBBI); +      LLVM_DEBUG(dbgs() << "  move kill to " << Kill << '\t' << *MBBI);        AssignI.setStop(Kill);      }    } @@ -907,15 +908,17 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,      // MBB isn't in a loop, it doesn't get any better.  All dominators have a      // higher frequency by definition.      if (!Loop) { -      DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " -                   << printMBBReference(*MBB) << " at depth 0\n"); +      LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) +                        << " dominates " << printMBBReference(*MBB) +                        << " at depth 0\n");        return MBB;      }      // We'll never be able to exit the DefLoop.      if (Loop == DefLoop) { -      DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " -                   << printMBBReference(*MBB) << " in the same loop\n"); +      LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) +                        << " dominates " << printMBBReference(*MBB) +                        << " in the same loop\n");        return MBB;      } @@ -924,8 +927,9 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,      if (Depth < BestDepth) {        BestMBB = MBB;        BestDepth = Depth; -      DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " -                   << printMBBReference(*MBB) << " at depth " << Depth << '\n'); +      LLVM_DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) +                        << " dominates " << printMBBReference(*MBB) +                        << " at depth " << Depth << '\n');      }      // Leave loop by going to the immediate dominator of the loop header. @@ -1031,14 +1035,14 @@ void SplitEditor::hoistCopies() {      // instruction in the complement range.  All other copies of ParentVNI      // should be eliminated.      if (VNI->def == ParentVNI->def) { -      DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n'); +      LLVM_DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');        Dom = DomPair(ValMBB, VNI->def);        continue;      }      // Skip the singly mapped values.  There is nothing to gain from hoisting a      // single back-copy.      if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) { -      DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n'); +      LLVM_DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');        continue;      } @@ -1062,10 +1066,11 @@ void SplitEditor::hoistCopies() {        Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);      } -    DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def -                 << " for parent " << ParentVNI->id << '@' << ParentVNI->def -                 << " hoist to " << printMBBReference(*Dom.first) << ' ' -                 << Dom.second << '\n'); +    LLVM_DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' +                      << VNI->def << " for parent " << ParentVNI->id << '@' +                      << ParentVNI->def << " hoist to " +                      << printMBBReference(*Dom.first) << ' ' << Dom.second +                      << '\n');    }    // Insert the hoisted copies. @@ -1118,7 +1123,7 @@ bool SplitEditor::transferValues() {    bool Skipped = false;    RegAssignMap::const_iterator AssignI = RegAssign.begin();    for (const LiveRange::Segment &S : Edit->getParent()) { -    DEBUG(dbgs() << "  blit " << S << ':'); +    LLVM_DEBUG(dbgs() << "  blit " << S << ':');      VNInfo *ParentVNI = S.valno;      // RegAssign has holes where RegIdx 0 should be used.      SlotIndex Start = S.start; @@ -1140,14 +1145,14 @@ bool SplitEditor::transferValues() {        }        // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. -      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx -                   << '(' << printReg(Edit->get(RegIdx)) << ')'); +      LLVM_DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx << '(' +                        << printReg(Edit->get(RegIdx)) << ')');        LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));        // Check for a simply defined value that can be blitted directly.        ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));        if (VNInfo *VNI = VFP.getPointer()) { -        DEBUG(dbgs() << ':' << VNI->id); +        LLVM_DEBUG(dbgs() << ':' << VNI->id);          LI.addSegment(LiveInterval::Segment(Start, End, VNI));          Start = End;          continue; @@ -1155,7 +1160,7 @@ bool SplitEditor::transferValues() {        // Skip values with forced recomputation.        if (VFP.getInt()) { -        DEBUG(dbgs() << "(recalc)"); +        LLVM_DEBUG(dbgs() << "(recalc)");          Skipped = true;          Start = End;          continue; @@ -1174,7 +1179,7 @@ bool SplitEditor::transferValues() {        if (Start != BlockStart) {          VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));          assert(VNI && "Missing def for complex mapped value"); -        DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB)); +        LLVM_DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB));          // MBB has its own def. Is it also live-out?          if (BlockEnd <= End)            LRC.setLiveOutValue(&*MBB, VNI); @@ -1187,7 +1192,7 @@ bool SplitEditor::transferValues() {        // Handle the live-in blocks covered by [Start;End).        assert(Start <= BlockStart && "Expected live-in block");        while (BlockStart < End) { -        DEBUG(dbgs() << ">" << printMBBReference(*MBB)); +        LLVM_DEBUG(dbgs() << ">" << printMBBReference(*MBB));          BlockEnd = LIS.getMBBEndIdx(&*MBB);          if (BlockStart == ParentVNI->def) {            // This block has the def of a parent PHI, so it isn't live-in. @@ -1212,7 +1217,7 @@ bool SplitEditor::transferValues() {        }        Start = End;      } while (Start != S.end); -    DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(dbgs() << '\n');    }    LRCalc[0].calculateValues(); @@ -1314,7 +1319,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {      ++RI;      // LiveDebugVariables should have handled all DBG_VALUE instructions.      if (MI->isDebugValue()) { -      DEBUG(dbgs() << "Zapping " << *MI); +      LLVM_DEBUG(dbgs() << "Zapping " << *MI);        MO.setReg(0);        continue;      } @@ -1330,8 +1335,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {      unsigned RegIdx = RegAssign.lookup(Idx);      LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));      MO.setReg(LI.reg); -    DEBUG(dbgs() << "  rewr " << printMBBReference(*MI->getParent()) << '\t' -                 << Idx << ':' << RegIdx << '\t' << *MI); +    LLVM_DEBUG(dbgs() << "  rewr " << printMBBReference(*MI->getParent()) +                      << '\t' << Idx << ':' << RegIdx << '\t' << *MI);      // Extend liveness to Idx if the instruction reads reg.      if (!ExtendRanges || MO.isUndef()) @@ -1416,7 +1421,7 @@ void SplitEditor::deleteRematVictims() {        if (!MI->allDefsAreDead())          continue; -      DEBUG(dbgs() << "All defs dead: " << *MI); +      LLVM_DEBUG(dbgs() << "All defs dead: " << *MI);        Dead.push_back(MI);      }    } @@ -1598,9 +1603,9 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,    SlotIndex Start, Stop;    std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); -  DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop << ") intf " -               << LeaveBefore << '-' << EnterAfter << ", live-through " -               << IntvIn << " -> " << IntvOut); +  LLVM_DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop +                    << ") intf " << LeaveBefore << '-' << EnterAfter +                    << ", live-through " << IntvIn << " -> " << IntvOut);    assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); @@ -1611,7 +1616,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,    MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);    if (!IntvOut) { -    DEBUG(dbgs() << ", spill on entry.\n"); +    LLVM_DEBUG(dbgs() << ", spill on entry.\n");      //      //        <<<<<<<<<    Possible LeaveBefore interference.      //    |-----------|    Live through. @@ -1625,7 +1630,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,    }    if (!IntvIn) { -    DEBUG(dbgs() << ", reload on exit.\n"); +    LLVM_DEBUG(dbgs() << ", reload on exit.\n");      //      //    >>>>>>>          Possible EnterAfter interference.      //    |-----------|    Live through. @@ -1639,7 +1644,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,    }    if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) { -    DEBUG(dbgs() << ", straight through.\n"); +    LLVM_DEBUG(dbgs() << ", straight through.\n");      //      //    |-----------|    Live through.      //    -------------    Straight through, same intv, no interference. @@ -1655,7 +1660,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,    if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||                    LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { -    DEBUG(dbgs() << ", switch avoiding interference.\n"); +    LLVM_DEBUG(dbgs() << ", switch avoiding interference.\n");      //      //    >>>>     <<<<    Non-overlapping EnterAfter/LeaveBefore interference.      //    |-----------|    Live through. @@ -1676,7 +1681,7 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,      return;    } -  DEBUG(dbgs() << ", create local intv for interference.\n"); +  LLVM_DEBUG(dbgs() << ", create local intv for interference.\n");    //    //    >>><><><><<<<    Overlapping EnterAfter/LeaveBefore interference.    //    |-----------|    Live through. @@ -1700,17 +1705,18 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,    SlotIndex Start, Stop;    std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); -  DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop -               << "), uses " << BI.FirstInstr << '-' << BI.LastInstr -               << ", reg-in " << IntvIn << ", leave before " << LeaveBefore -               << (BI.LiveOut ? ", stack-out" : ", killed in block")); +  LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' +                    << Stop << "), uses " << BI.FirstInstr << '-' +                    << BI.LastInstr << ", reg-in " << IntvIn +                    << ", leave before " << LeaveBefore +                    << (BI.LiveOut ? ", stack-out" : ", killed in block"));    assert(IntvIn && "Must have register in");    assert(BI.LiveIn && "Must be live-in");    assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");    if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) { -    DEBUG(dbgs() << " before interference.\n"); +    LLVM_DEBUG(dbgs() << " before interference.\n");      //      //               <<<    Interference after kill.      //     |---o---x   |    Killed in block. @@ -1735,13 +1741,13 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,      //            \_____    Stack interval is live-out.      //      if (BI.LastInstr < LSP) { -      DEBUG(dbgs() << ", spill after last use before interference.\n"); +      LLVM_DEBUG(dbgs() << ", spill after last use before interference.\n");        selectIntv(IntvIn);        SlotIndex Idx = leaveIntvAfter(BI.LastInstr);        useIntv(Start, Idx);        assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");      } else { -      DEBUG(dbgs() << ", spill before last split point.\n"); +      LLVM_DEBUG(dbgs() << ", spill before last split point.\n");        selectIntv(IntvIn);        SlotIndex Idx = leaveIntvBefore(LSP);        overlapIntv(Idx, BI.LastInstr); @@ -1756,7 +1762,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,    // different register.    unsigned LocalIntv = openIntv();    (void)LocalIntv; -  DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); +  LLVM_DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");    if (!BI.LiveOut || BI.LastInstr < LSP) {      // @@ -1792,10 +1798,11 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,    SlotIndex Start, Stop;    std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); -  DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop -               << "), uses " << BI.FirstInstr << '-' << BI.LastInstr -               << ", reg-out " << IntvOut << ", enter after " << EnterAfter -               << (BI.LiveIn ? ", stack-in" : ", defined in block")); +  LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' +                    << Stop << "), uses " << BI.FirstInstr << '-' +                    << BI.LastInstr << ", reg-out " << IntvOut +                    << ", enter after " << EnterAfter +                    << (BI.LiveIn ? ", stack-in" : ", defined in block"));    SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); @@ -1804,7 +1811,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,    assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");    if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) { -    DEBUG(dbgs() << " after interference.\n"); +    LLVM_DEBUG(dbgs() << " after interference.\n");      //      //    >>>>             Interference before def.      //    |   o---o---|    Defined in block. @@ -1816,7 +1823,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,    }    if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) { -    DEBUG(dbgs() << ", reload after interference.\n"); +    LLVM_DEBUG(dbgs() << ", reload after interference.\n");      //      //    >>>>             Interference before def.      //    |---o---o---|    Live-through, stack-in. @@ -1832,7 +1839,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,    // The interference is overlapping somewhere we wanted to use IntvOut. That    // means we need to create a local interval that can be allocated a    // different register. -  DEBUG(dbgs() << ", interference overlaps uses.\n"); +  LLVM_DEBUG(dbgs() << ", interference overlaps uses.\n");    //    //    >>>>>>>          Interference overlapping uses.    //    |---o---o---|    Live-through, stack-in. diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 2dafaf587801..ed664e4f81a3 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -421,7 +421,7 @@ private:    SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,        MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore, -      unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy); +      unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);  public:    /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index 608845498b48..81a41970f9e2 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -39,9 +39,9 @@  #include "llvm/CodeGen/Passes.h"  #include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/Function.h" @@ -422,9 +422,6 @@ class StackColoring : public MachineFunctionPass {    /// SlotIndex analysis object.    SlotIndexes *Indexes; -  /// The stack protector object. -  StackProtector *SP; -    /// The list of lifetime markers found. These markers are to be removed    /// once the coloring is done.    SmallVector<MachineInstr*, 8> Markers; @@ -448,7 +445,7 @@ public:    }    void getAnalysisUsage(AnalysisUsage &AU) const override; -  bool runOnMachineFunction(MachineFunction &MF) override; +  bool runOnMachineFunction(MachineFunction &Func) override;  private:    /// Used in collectMarkers @@ -523,13 +520,11 @@ char &llvm::StackColoringID = StackColoring::ID;  INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE,                        "Merge disjoint stack slots", false, false)  INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(StackProtector)  INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE,                      "Merge disjoint stack slots", false, false)  void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<SlotIndexes>(); -  AU.addRequired<StackProtector>();    MachineFunctionPass::getAnalysisUsage(AU);  } @@ -600,12 +595,12 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,        isStart = false;        return true;      } -    if (! applyFirstUse(Slot)) { +    if (!applyFirstUse(Slot)) {        isStart = true;        return true;      }    } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { -    if (! MI.isDebugValue()) { +    if (!MI.isDebugInstr()) {        bool found = false;        for (const MachineOperand &MO : MI.operands()) {          if (!MO.isFI()) @@ -672,13 +667,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {          }          const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);          if (Allocation) { -          DEBUG(dbgs() << "Found a lifetime "); -          DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START -                               ? "start" -                               : "end")); -          DEBUG(dbgs() << " marker for slot #" << Slot); -          DEBUG(dbgs() << " with allocation: " << Allocation->getName() -                       << "\n"); +          LLVM_DEBUG(dbgs() << "Found a lifetime "); +          LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START +                                    ? "start" +                                    : "end")); +          LLVM_DEBUG(dbgs() << " marker for slot #" << Slot); +          LLVM_DEBUG(dbgs() +                     << " with allocation: " << Allocation->getName() << "\n");          }          Markers.push_back(&MI);          MarkersFound += 1; @@ -707,7 +702,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {    for (unsigned slot = 0; slot < NumSlot; ++slot)      if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)        ConservativeSlots.set(slot); -  DEBUG(dumpBV("Conservative slots", ConservativeSlots)); +  LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));    // Step 2: compute begin/end sets for each block @@ -738,14 +733,16 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {            BlockInfo.End.set(Slot);          } else {            for (auto Slot : slots) { -            DEBUG(dbgs() << "Found a use of slot #" << Slot); -            DEBUG(dbgs() << " at " << printMBBReference(*MBB) << " index "); -            DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); +            LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot); +            LLVM_DEBUG(dbgs() +                       << " at " << printMBBReference(*MBB) << " index "); +            LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));              const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);              if (Allocation) { -              DEBUG(dbgs() << " with allocation: "<< Allocation->getName()); +              LLVM_DEBUG(dbgs() +                         << " with allocation: " << Allocation->getName());              } -            DEBUG(dbgs() << "\n"); +            LLVM_DEBUG(dbgs() << "\n");              if (BlockInfo.End.test(Slot)) {                BlockInfo.End.reset(Slot);              } @@ -779,8 +776,11 @@ void StackColoring::calculateLocalLiveness() {        for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),             PE = BB->pred_end(); PI != PE; ++PI) {          LivenessMap::const_iterator I = BlockLiveness.find(*PI); -        assert(I != BlockLiveness.end() && "Predecessor not found"); -        LocalLiveIn |= I->second.LiveOut; +        // PR37130: transformations prior to stack coloring can +        // sometimes leave behind statically unreachable blocks; these +        // can be safely skipped here. +        if (I != BlockLiveness.end()) +          LocalLiveIn |= I->second.LiveOut;        }        // Compute LiveOut by subtracting out lifetimes that end in this @@ -880,7 +880,7 @@ bool StackColoring::removeAllMarkers() {    }    Markers.clear(); -  DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); +  LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");    return Count;  } @@ -894,8 +894,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {      if (!VI.Var)        continue;      if (SlotRemap.count(VI.Slot)) { -      DEBUG(dbgs() << "Remapping debug info for [" -                   << cast<DILocalVariable>(VI.Var)->getName() << "].\n"); +      LLVM_DEBUG(dbgs() << "Remapping debug info for [" +                        << cast<DILocalVariable>(VI.Var)->getName() << "].\n");        VI.Slot = SlotRemap[VI.Slot];        FixedDbg++;      } @@ -930,9 +930,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {      MergedAllocas.insert(From);      MergedAllocas.insert(To); -    // Allow the stack protector to adjust its value map to account for the -    // upcoming replacement. -    SP->adjustForColoring(From, To); +    // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf +    // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure +    // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray. +    MachineFrameInfo::SSPLayoutKind FromKind +        = MFI->getObjectSSPLayout(SI.first); +    MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second); +    if (FromKind != MachineFrameInfo::SSPLK_None && +        (ToKind == MachineFrameInfo::SSPLK_None || +         (ToKind != MachineFrameInfo::SSPLK_LargeArray && +          FromKind != MachineFrameInfo::SSPLK_AddrOf))) +      MFI->setObjectSSPLayout(SI.second, FromKind);      // The new alloca might not be valid in a llvm.dbg.declare for this      // variable, so undef out the use to make the verifier happy. @@ -993,13 +1001,13 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {          // the calculated range then it means that the alloca usage moved          // outside of the lifetime markers, or that the user has a bug.          // NOTE: Alloca address calculations which happen outside the lifetime -        // zone are are okay, despite the fact that we don't have a good way +        // zone are okay, despite the fact that we don't have a good way          // for validating all of the usages of the calculation.  #ifndef NDEBUG          bool TouchesMemory = I.mayLoad() || I.mayStore();          // If we *don't* protect the user from escaped allocas, don't bother          // validating the instructions. -        if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { +        if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {            SlotIndex Index = Indexes->getInstructionIndex(I);            const LiveInterval *Interval = &*Intervals[FromSlot];            assert(Interval->find(Index) != Interval->end() && @@ -1064,16 +1072,16 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {              SlotRemap.count(H.CatchObj.FrameIndex))            H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex]; -  DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); -  DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); -  DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); +  LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n"); +  LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n"); +  LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");  }  void StackColoring::removeInvalidSlotRanges() {    for (MachineBasicBlock &BB : *MF)      for (MachineInstr &I : BB) {        if (I.getOpcode() == TargetOpcode::LIFETIME_START || -          I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue()) +          I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())          continue;        // Some intervals are suspicious! In some cases we find address @@ -1104,7 +1112,7 @@ void StackColoring::removeInvalidSlotRanges() {          SlotIndex Index = Indexes->getInstructionIndex(I);          if (Interval->find(Index) == Interval->end()) {            Interval->clear(); -          DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); +          LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");            EscapedAllocas++;          }        } @@ -1128,12 +1136,11 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,  }  bool StackColoring::runOnMachineFunction(MachineFunction &Func) { -  DEBUG(dbgs() << "********** Stack Coloring **********\n" -               << "********** Function: " << Func.getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n" +                    << "********** Function: " << Func.getName() << '\n');    MF = &Func;    MFI = &MF->getFrameInfo();    Indexes = &getAnalysis<SlotIndexes>(); -  SP = &getAnalysis<StackProtector>();    BlockLiveness.clear();    BasicBlocks.clear();    BasicBlockNumbering.clear(); @@ -1156,21 +1163,23 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {    unsigned NumMarkers = collectMarkers(NumSlots);    unsigned TotalSize = 0; -  DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); -  DEBUG(dbgs()<<"Slot structure:\n"); +  LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots +                    << " slots\n"); +  LLVM_DEBUG(dbgs() << "Slot structure:\n");    for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { -    DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); +    LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i) +                      << " bytes.\n");      TotalSize += MFI->getObjectSize(i);    } -  DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); +  LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");    // Don't continue because there are not enough lifetime markers, or the    // stack is too small, or we are told not to optimize the slots.    if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||        skipFunction(Func.getFunction())) { -    DEBUG(dbgs()<<"Will not try to merge slots.\n"); +    LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");      return removeAllMarkers();    } @@ -1183,12 +1192,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {    // Calculate the liveness of each block.    calculateLocalLiveness(); -  DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); -  DEBUG(dump()); +  LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); +  LLVM_DEBUG(dump());    // Propagate the liveness information.    calculateLiveIntervals(NumSlots); -  DEBUG(dumpIntervals()); +  LLVM_DEBUG(dumpIntervals());    // Search for allocas which are used outside of the declared lifetime    // markers. @@ -1224,7 +1233,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {    });    for (auto &s : LiveStarts) -    std::sort(s.begin(), s.end()); +    llvm::sort(s.begin(), s.end());    bool Changed = true;    while (Changed) { @@ -1259,8 +1268,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {            SlotRemap[SecondSlot] = FirstSlot;            SortedSlots[J] = -1; -          DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< -                SecondSlot<<" together.\n"); +          LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #" +                            << SecondSlot << " together.\n");            unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),                                             MFI->getObjectAlignment(SecondSlot)); @@ -1280,8 +1289,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {    // Record statistics.    StackSpaceSaved += ReducedSize;    StackSlotMerged += RemovedSlots; -  DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< -        ReducedSize<<" bytes\n"); +  LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved " +                    << ReducedSize << " bytes\n");    // Scan the entire function and update all machine operands that use frame    // indices to use the remapped frame index. diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index cc9af92c395f..00cf8070be5e 100644 --- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -39,7 +39,7 @@ STATISTIC(NumBBsHaveNoStackmap,   "Number of basic blocks with no stackmap");  STATISTIC(NumStackMaps,           "Number of StackMaps visited");  namespace { -/// \brief This pass calculates the liveness information for each basic block in +/// This pass calculates the liveness information for each basic block in  /// a function and attaches the register live-out information to a patchpoint  /// intrinsic if present.  /// @@ -54,10 +54,10 @@ class StackMapLiveness : public MachineFunctionPass {  public:    static char ID; -  /// \brief Default construct and initialize the pass. +  /// Default construct and initialize the pass.    StackMapLiveness(); -  /// \brief Tell the pass manager which passes we depend on and what +  /// Tell the pass manager which passes we depend on and what    /// information we preserve.    void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -66,17 +66,17 @@ public:          MachineFunctionProperties::Property::NoVRegs);    } -  /// \brief Calculate the liveness information for the given machine function. +  /// Calculate the liveness information for the given machine function.    bool runOnMachineFunction(MachineFunction &MF) override;  private: -  /// \brief Performs the actual liveness calculation for the function. +  /// Performs the actual liveness calculation for the function.    bool calculateLiveness(MachineFunction &MF); -  /// \brief Add the current register live set to the instruction. +  /// Add the current register live set to the instruction.    void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI); -  /// \brief Create a register mask and initialize it with the registers from +  /// Create a register mask and initialize it with the registers from    /// the register live set.    uint32_t *createRegisterMask(MachineFunction &MF) const;  }; @@ -106,8 +106,8 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {    if (!EnablePatchPointLiveness)      return false; -  DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName() -               << " **********\n"); +  LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " +                    << MF.getName() << " **********\n");    TRI = MF.getSubtarget().getRegisterInfo();    ++NumStackMapFuncVisited; @@ -124,7 +124,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {    bool HasChanged = false;    // For all basic blocks in the function.    for (auto &MBB : MF) { -    DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); +    LLVM_DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");      LiveRegs.init(*TRI);      // FIXME: This should probably be addLiveOuts().      LiveRegs.addLiveOutsNoPristines(MBB); @@ -138,7 +138,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {          HasStackMap = true;          ++NumStackMaps;        } -      DEBUG(dbgs() << "   " << LiveRegs << "   " << *I); +      LLVM_DEBUG(dbgs() << "   " << LiveRegs << "   " << *I);        LiveRegs.stepBackward(*I);      }      ++NumBBsVisited; @@ -160,7 +160,7 @@ void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF,  /// register live set.  uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const {    // The mask is owned and cleaned up by the Machine Function. -  uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); +  uint32_t *Mask = MF.allocateRegMask();    for (auto Reg : LiveRegs)      Mask[Reg / 32] |= 1U << (Reg % 32); diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index e66a25bec911..19a191c01db9 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -268,11 +268,11 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {    // in the list. Merge entries that refer to the same dwarf register and use    // the maximum size that needs to be spilled. -  std::sort(LiveOuts.begin(), LiveOuts.end(), -            [](const LiveOutReg &LHS, const LiveOutReg &RHS) { -              // Only sort by the dwarf register number. -              return LHS.DwarfRegNum < RHS.DwarfRegNum; -            }); +  llvm::sort(LiveOuts.begin(), LiveOuts.end(), +             [](const LiveOutReg &LHS, const LiveOutReg &RHS) { +               // Only sort by the dwarf register number. +               return LHS.DwarfRegNum < RHS.DwarfRegNum; +             });    for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {      for (auto II = std::next(I); II != E; ++II) { @@ -420,13 +420,13 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {    OS.EmitIntValue(0, 2);               // Reserved.    // Num functions. -  DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n'); +  LLVM_DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');    OS.EmitIntValue(FnInfos.size(), 4);    // Num constants. -  DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); +  LLVM_DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');    OS.EmitIntValue(ConstPool.size(), 4);    // Num callsites. -  DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); +  LLVM_DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');    OS.EmitIntValue(CSInfos.size(), 4);  } @@ -439,11 +439,11 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {  /// }  void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {    // Function Frame records. -  DEBUG(dbgs() << WSMP << "functions:\n"); +  LLVM_DEBUG(dbgs() << WSMP << "functions:\n");    for (auto const &FR : FnInfos) { -    DEBUG(dbgs() << WSMP << "function addr: " << FR.first -                 << " frame size: " << FR.second.StackSize -                 << " callsite count: " << FR.second.RecordCount << '\n'); +    LLVM_DEBUG(dbgs() << WSMP << "function addr: " << FR.first +                      << " frame size: " << FR.second.StackSize +                      << " callsite count: " << FR.second.RecordCount << '\n');      OS.EmitSymbolValue(FR.first, 8);      OS.EmitIntValue(FR.second.StackSize, 8);      OS.EmitIntValue(FR.second.RecordCount, 8); @@ -455,9 +455,9 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {  /// int64  : Constants[NumConstants]  void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {    // Constant pool entries. -  DEBUG(dbgs() << WSMP << "constants:\n"); +  LLVM_DEBUG(dbgs() << WSMP << "constants:\n");    for (const auto &ConstEntry : ConstPool) { -    DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); +    LLVM_DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');      OS.EmitIntValue(ConstEntry.second, 8);    }  } @@ -492,7 +492,7 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {  ///   0x4, Constant, Offset              (small constant)  ///   0x5, ConstIndex, Constants[Offset] (large constant)  void StackMaps::emitCallsiteEntries(MCStreamer &OS) { -  DEBUG(print(dbgs())); +  LLVM_DEBUG(print(dbgs()));    // Callsite entries.    for (const auto &CSI : CSInfos) {      const LocationVec &CSLocs = CSI.Locations; @@ -569,7 +569,7 @@ void StackMaps::serializeToStackMapSection() {    OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));    // Serialize data. -  DEBUG(dbgs() << "********** Stack Map Output **********\n"); +  LLVM_DEBUG(dbgs() << "********** Stack Map Output **********\n");    emitStackmapHeader(OS);    emitFunctionFrameRecords(OS);    emitConstantPoolEntries(OS); diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index 62cef95a4af2..cb12c7ce6e82 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -36,6 +36,7 @@  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/Intrinsics.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/Module.h" @@ -69,32 +70,6 @@ INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE,  FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); } -StackProtector::SSPLayoutKind -StackProtector::getSSPLayout(const AllocaInst *AI) const { -  return AI ? Layout.lookup(AI) : SSPLK_None; -} - -void StackProtector::adjustForColoring(const AllocaInst *From, -                                       const AllocaInst *To) { -  // When coloring replaces one alloca with another, transfer the SSPLayoutKind -  // tag from the remapped to the target alloca. The remapped alloca should -  // have a size smaller than or equal to the replacement alloca. -  SSPLayoutMap::iterator I = Layout.find(From); -  if (I != Layout.end()) { -    SSPLayoutKind Kind = I->second; -    Layout.erase(I); - -    // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite -    // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that -    // SSPLK_SmallArray does not overwrite SSPLK_LargeArray. -    I = Layout.find(To); -    if (I == Layout.end()) -      Layout.insert(std::make_pair(To, Kind)); -    else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf) -      I->second = Kind; -  } -} -  void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<TargetPassConfig>();    AU.addPreserved<DominatorTreeWrapperPass>(); @@ -182,6 +157,14 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,    return NeedsProtector;  } +static bool isLifetimeInst(const Instruction *I) { +  if (const auto Intrinsic = dyn_cast<IntrinsicInst>(I)) { +    const auto Id = Intrinsic->getIntrinsicID(); +    return Id == Intrinsic::lifetime_start || Id == Intrinsic::lifetime_end; +  } +  return false; +} +  bool StackProtector::HasAddressTaken(const Instruction *AI) {    for (const User *U : AI->users()) {      if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { @@ -190,8 +173,10 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {      } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {        if (AI == SI->getOperand(0))          return true; -    } else if (isa<CallInst>(U)) { -      return true; +    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { +      // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall(). +      if (!isa<DbgInfoIntrinsic>(CI) && !isLifetimeInst(CI)) +        return true;      } else if (isa<InvokeInst>(U)) {        return true;      } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) { @@ -214,7 +199,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) {    return false;  } -/// \brief Check whether or not this function needs a stack protector based +/// Check whether or not this function needs a stack protector based  /// upon the stack protector level.  ///  /// We use two heuristics: a standard (ssp) and strong (sspstrong). @@ -278,18 +263,21 @@ bool StackProtector::RequiresStackProtector() {              if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {                // A call to alloca with size >= SSPBufferSize requires                // stack protectors. -              Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); +              Layout.insert(std::make_pair(AI, +                                           MachineFrameInfo::SSPLK_LargeArray));                ORE.emit(RemarkBuilder);                NeedsProtector = true;              } else if (Strong) {                // Require protectors for all alloca calls in strong mode. -              Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); +              Layout.insert(std::make_pair(AI, +                                           MachineFrameInfo::SSPLK_SmallArray));                ORE.emit(RemarkBuilder);                NeedsProtector = true;              }            } else {              // A call to alloca with a variable size requires protectors. -            Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); +            Layout.insert(std::make_pair(AI, +                                         MachineFrameInfo::SSPLK_LargeArray));              ORE.emit(RemarkBuilder);              NeedsProtector = true;            } @@ -298,8 +286,9 @@ bool StackProtector::RequiresStackProtector() {          bool IsLarge = false;          if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { -          Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray -                                                   : SSPLK_SmallArray)); +          Layout.insert(std::make_pair(AI, IsLarge +                                       ? MachineFrameInfo::SSPLK_LargeArray +                                       : MachineFrameInfo::SSPLK_SmallArray));            ORE.emit([&]() {              return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)                     << "Stack protection applied to function " @@ -313,7 +302,7 @@ bool StackProtector::RequiresStackProtector() {          if (Strong && HasAddressTaken(AI)) {            ++NumAddrTaken; -          Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); +          Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));            ORE.emit([&]() {              return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken",                                        &I) @@ -523,3 +512,23 @@ BasicBlock *StackProtector::CreateFailBB() {  bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {    return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator());  } + +void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const { +  if (Layout.empty()) +    return; + +  for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { +    if (MFI.isDeadObjectIndex(I)) +      continue; + +    const AllocaInst *AI = MFI.getObjectAllocation(I); +    if (!AI) +      continue; + +    SSPLayoutMap::const_iterator LI = Layout.find(AI); +    if (LI == Layout.end()) +      continue; + +    MFI.setObjectSSPLayout(I, LI->second); +  } +} diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index 8fc7a4a32842..eb15b15a24a6 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -82,14 +82,14 @@ namespace {      // AllColors - If index is set, it's a spill slot, i.e. color.      // FIXME: This assumes PEI locate spill slot with smaller indices      // closest to stack pointer / frame pointer. Therefore, smaller -    // index == better color. -    BitVector AllColors; +    // index == better color. This is per stack ID. +    SmallVector<BitVector, 2> AllColors; -    // NextColor - Next "color" that's not yet used. -    int NextColor = -1; +    // NextColor - Next "color" that's not yet used. This is per stack ID. +    SmallVector<int, 2> NextColors = { -1 }; -    // UsedColors - "Colors" that have been assigned. -    BitVector UsedColors; +    // UsedColors - "Colors" that have been assigned. This is per stack ID +    SmallVector<BitVector, 2> UsedColors;      // Assignments - Color to intervals mapping.      SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments; @@ -196,10 +196,15 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {  /// to a sorted (by weight) list.  void StackSlotColoring::InitializeSlots() {    int LastFI = MFI->getObjectIndexEnd(); + +  // There is always at least one stack ID. +  AllColors.resize(1); +  UsedColors.resize(1); +    OrigAlignments.resize(LastFI);    OrigSizes.resize(LastFI); -  AllColors.resize(LastFI); -  UsedColors.resize(LastFI); +  AllColors[0].resize(LastFI); +  UsedColors[0].resize(LastFI);    Assignments.resize(LastFI);    using Pair = std::iterator_traits<LiveStacks::iterator>::value_type; @@ -209,29 +214,42 @@ void StackSlotColoring::InitializeSlots() {    Intervals.reserve(LS->getNumIntervals());    for (auto &I : *LS)      Intervals.push_back(&I); -  std::sort(Intervals.begin(), Intervals.end(), -            [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); +  llvm::sort(Intervals.begin(), Intervals.end(), +             [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });    // Gather all spill slots into a list. -  DEBUG(dbgs() << "Spill slot intervals:\n"); +  LLVM_DEBUG(dbgs() << "Spill slot intervals:\n");    for (auto *I : Intervals) {      LiveInterval &li = I->second; -    DEBUG(li.dump()); +    LLVM_DEBUG(li.dump());      int FI = TargetRegisterInfo::stackSlot2Index(li.reg);      if (MFI->isDeadObjectIndex(FI))        continue; +      SSIntervals.push_back(&li);      OrigAlignments[FI] = MFI->getObjectAlignment(FI);      OrigSizes[FI]      = MFI->getObjectSize(FI); -    AllColors.set(FI); + +    auto StackID = MFI->getStackID(FI); +    if (StackID != 0) { +      AllColors.resize(StackID + 1); +      UsedColors.resize(StackID + 1); +      AllColors[StackID].resize(LastFI); +      UsedColors[StackID].resize(LastFI); +    } + +    AllColors[StackID].set(FI);    } -  DEBUG(dbgs() << '\n'); +  LLVM_DEBUG(dbgs() << '\n');    // Sort them by weight.    std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); +  NextColors.resize(AllColors.size()); +    // Get first "color". -  NextColor = AllColors.find_first(); +  for (unsigned I = 0, E = AllColors.size(); I != E; ++I) +    NextColors[I] = AllColors[I].find_first();  }  /// OverlapWithAssignments - Return true if LiveInterval overlaps with any @@ -252,37 +270,41 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {    int Color = -1;    bool Share = false;    int FI = TargetRegisterInfo::stackSlot2Index(li->reg); +  uint8_t StackID = MFI->getStackID(FI);    if (!DisableSharing) { +      // Check if it's possible to reuse any of the used colors. -    Color = UsedColors.find_first(); +    Color = UsedColors[StackID].find_first();      while (Color != -1) {        if (!OverlapWithAssignments(li, Color)) {          Share = true;          ++NumEliminated;          break;        } -      Color = UsedColors.find_next(Color); +      Color = UsedColors[StackID].find_next(Color);      }    }    if (Color != -1 && MFI->getStackID(Color) != MFI->getStackID(FI)) { -    DEBUG(dbgs() << "cannot share FIs with different stack IDs\n"); +    LLVM_DEBUG(dbgs() << "cannot share FIs with different stack IDs\n");      Share = false;    }    // Assign it to the first available color (assumed to be the best) if it's    // not possible to share a used color with other objects.    if (!Share) { -    assert(NextColor != -1 && "No more spill slots?"); -    Color = NextColor; -    UsedColors.set(Color); -    NextColor = AllColors.find_next(NextColor); +    assert(NextColors[StackID] != -1 && "No more spill slots?"); +    Color = NextColors[StackID]; +    UsedColors[StackID].set(Color); +    NextColors[StackID] = AllColors[StackID].find_next(NextColors[StackID]);    } +  assert(MFI->getStackID(Color) == MFI->getStackID(FI)); +    // Record the assignment.    Assignments[Color].push_back(li); -  DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); +  LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");    // Change size and alignment of the allocated slot. If there are multiple    // objects sharing the same slot, then make sure the size and alignment @@ -305,7 +327,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {    SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);    BitVector UsedColors(NumObjs); -  DEBUG(dbgs() << "Color spill slot intervals:\n"); +  LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n");    bool Changed = false;    for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {      LiveInterval *li = SSIntervals[i]; @@ -319,7 +341,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {      Changed |= (SS != NewSS);    } -  DEBUG(dbgs() << "\nSpill slots after coloring:\n"); +  LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");    for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {      LiveInterval *li = SSIntervals[i];      int SS = TargetRegisterInfo::stackSlot2Index(li->reg); @@ -330,8 +352,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {  #ifndef NDEBUG    for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) -    DEBUG(SSIntervals[i]->dump()); -  DEBUG(dbgs() << '\n'); +    LLVM_DEBUG(SSIntervals[i]->dump()); +  LLVM_DEBUG(dbgs() << '\n');  #endif    if (!Changed) @@ -357,10 +379,13 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {    }    // Delete unused stack slots. -  while (NextColor != -1) { -    DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); -    MFI->RemoveStackObject(NextColor); -    NextColor = AllColors.find_next(NextColor); +  for (int StackID = 0, E = AllColors.size(); StackID != E; ++StackID) { +    int NextColor = NextColors[StackID]; +    while (NextColor != -1) { +      LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); +      MFI->RemoveStackObject(NextColor); +      NextColor = AllColors[StackID].find_next(NextColor); +    }    }    return true; @@ -382,6 +407,8 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,      int NewFI = SlotMapping[OldFI];      if (NewFI == -1 || NewFI == OldFI)        continue; + +    assert(MFI->getStackID(OldFI) == MFI->getStackID(NewFI));      MO.setIndex(NewFI);    } @@ -418,17 +445,21 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {      unsigned LoadReg = 0;      unsigned StoreReg = 0; -    if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS))) +    unsigned LoadSize = 0; +    unsigned StoreSize = 0; +    if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS, LoadSize)))        continue;      // Skip the ...pseudo debugging... instructions between a load and store. -    while ((NextMI != E) && NextMI->isDebugValue()) { +    while ((NextMI != E) && NextMI->isDebugInstr()) {        ++NextMI;        ++I;      }      if (NextMI == E) continue; -    if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS))) +    if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize))) +      continue; +    if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 || +        LoadSize != StoreSize)        continue; -    if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;      ++NumDead;      changed = true; @@ -450,10 +481,13 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {  }  bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { -  DEBUG({ -      dbgs() << "********** Stack Slot Coloring **********\n" -             << "********** Function: " << MF.getName() << '\n'; -    }); +  LLVM_DEBUG({ +    dbgs() << "********** Stack Slot Coloring **********\n" +           << "********** Function: " << MF.getName() << '\n'; +  }); + +  if (skipFunction(MF.getFunction())) +    return false;    MFI = &MF.getFrameInfo();    TII = MF.getSubtarget().getInstrInfo(); @@ -479,7 +513,9 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {    InitializeSlots();    Changed = ColorSlots(MF); -  NextColor = -1; +  for (int &Next : NextColors) +    Next = -1; +    SSIntervals.clear();    for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)      SSRefs[i].clear(); diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index df1eebf43b2b..25cd7802264e 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -7,8 +7,9 @@  //  //===----------------------------------------------------------------------===//  // -// This pass duplicates basic blocks ending in unconditional branches into -// the tails of their predecessors, using the TailDuplicator utility class. +/// \file This pass duplicates basic blocks ending in unconditional branches +/// into the tails of their predecessors, using the TailDuplicator utility +/// class.  //  //===----------------------------------------------------------------------===// @@ -26,38 +27,55 @@ using namespace llvm;  namespace { -/// Perform tail duplication. Delegates to TailDuplicator -class TailDuplicatePass : public MachineFunctionPass { +class TailDuplicateBase : public MachineFunctionPass {    TailDuplicator Duplicator; - +  bool PreRegAlloc;  public: -  static char ID; - -  explicit TailDuplicatePass() : MachineFunctionPass(ID) {} +  TailDuplicateBase(char &PassID, bool PreRegAlloc) +    : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}    bool runOnMachineFunction(MachineFunction &MF) override; -  void getAnalysisUsage(AnalysisUsage &AU) const override; +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<MachineBranchProbabilityInfo>(); +    MachineFunctionPass::getAnalysisUsage(AU); +  } +}; + +class TailDuplicate : public TailDuplicateBase { +public: +  static char ID; +  TailDuplicate() : TailDuplicateBase(ID, false) { +    initializeTailDuplicatePass(*PassRegistry::getPassRegistry()); +  } +}; + +class EarlyTailDuplicate : public TailDuplicateBase { +public: +  static char ID; +  EarlyTailDuplicate() : TailDuplicateBase(ID, true) { +    initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry()); +  }  };  } // end anonymous namespace -char TailDuplicatePass::ID = 0; +char TailDuplicate::ID; +char EarlyTailDuplicate::ID; -char &llvm::TailDuplicateID = TailDuplicatePass::ID; +char &llvm::TailDuplicateID = TailDuplicate::ID; +char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID; -INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false) +INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false) +INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication", +                "Early Tail Duplication", false, false) -bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { +bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {    if (skipFunction(MF.getFunction()))      return false;    auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - -  // TODO: Querying isSSA() to determine pre-/post-regalloc is fragile, better -  // split this into two passes instead. -  bool PreRegAlloc = MF.getRegInfo().isSSA(); -  Duplicator.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ false); +  Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false);    bool MadeChange = false;    while (Duplicator.tailDuplicateBlocks()) @@ -65,8 +83,3 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {    return MadeChange;  } - -void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequired<MachineBranchProbabilityInfo>(); -  MachineFunctionPass::getAnalysisUsage(AU); -} diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp index f51c884839b3..b118c176a897 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp @@ -37,6 +37,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h"  #include <algorithm>  #include <cassert>  #include <iterator> @@ -261,7 +262,7 @@ bool TailDuplicator::tailDuplicateBlocks() {    bool MadeChange = false;    if (PreRegAlloc && TailDupVerify) { -    DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); +    LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");      VerifyPHIs(*MF, true);    } @@ -371,6 +372,13 @@ void TailDuplicator::duplicateInstruction(      MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,      DenseMap<unsigned, RegSubRegPair> &LocalVRMap,      const DenseSet<unsigned> &UsedByPhi) { +  // Allow duplication of CFI instructions. +  if (MI->isCFIInstruction()) { +    BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), +      TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex( +      MI->getOperand(0).getCFIIndex()); +    return; +  }    MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);    if (PreRegAlloc) {      for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) { @@ -585,7 +593,13 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,    unsigned InstrCount = 0;    for (MachineInstr &MI : TailBB) {      // Non-duplicable things shouldn't be tail-duplicated. -    if (MI.isNotDuplicable()) +    // CFI instructions are marked as non-duplicable, because Darwin compact +    // unwind info emission can't handle multiple prologue setups. In case of +    // DWARF, allow them be duplicated, so that their existence doesn't prevent +    // tail duplication of some basic blocks, that would be duplicated otherwise. +    if (MI.isNotDuplicable() && +        (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() || +        !MI.isCFIInstruction()))        return false;      // Convergent instructions can be duplicated only if doing so doesn't add @@ -605,7 +619,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,      if (PreRegAlloc && MI.isCall())        return false; -    if (!MI.isPHI() && !MI.isDebugValue()) +    if (!MI.isPHI() && !MI.isMetaInstruction())        InstrCount += 1;      if (InstrCount > MaxDuplicateCount) @@ -704,8 +718,8 @@ bool TailDuplicator::duplicateSimpleBB(        continue;      Changed = true; -    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB -                 << "From simple Succ: " << *TailBB); +    LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB +                      << "From simple Succ: " << *TailBB);      MachineBasicBlock *NewTarget = *TailBB->succ_begin();      MachineBasicBlock *NextBB = PredBB->getNextNode(); @@ -785,8 +799,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,                                     MachineBasicBlock *ForcedLayoutPred,                                     SmallVectorImpl<MachineBasicBlock *> &TDBBs,                                     SmallVectorImpl<MachineInstr *> &Copies) { -  DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) -               << '\n'); +  LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) +                    << '\n');    DenseSet<unsigned> UsedByPhi;    getRegsUsedByPHIs(*TailBB, &UsedByPhi); @@ -816,8 +830,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,      if (IsLayoutSuccessor)        continue; -    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB -                 << "From Succ: " << *TailBB); +    LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB +                      << "From Succ: " << *TailBB);      TDBBs.push_back(PredBB); @@ -879,8 +893,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,        (!PriorTBB || PriorTBB == TailBB) &&        TailBB->pred_size() == 1 &&        !TailBB->hasAddressTaken()) { -    DEBUG(dbgs() << "\nMerging into block: " << *PrevBB -                 << "From MBB: " << *TailBB); +    LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB +                      << "From MBB: " << *TailBB);      // There may be a branch to the layout successor. This is unlikely but it      // happens. The correct thing to do is to remove the branch before      // duplicating the instructions in all cases. @@ -985,7 +999,7 @@ void TailDuplicator::removeDeadBlock(      MachineBasicBlock *MBB,      function_ref<void(MachineBasicBlock *)> *RemovalCallback) {    assert(MBB->pred_empty() && "MBB must be dead!"); -  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); +  LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);    if (RemovalCallback)      (*RemovalCallback)(MBB); diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index b2151eb49655..f0cfa2fbe4fd 100644 --- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -36,6 +36,13 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {    return Attr.getValueAsString() == "true";  } +bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const { +  assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) && +         MF.getFunction().hasFnAttribute(Attribute::NoUnwind) && +         !MF.getFunction().hasFnAttribute(Attribute::UWTable)); +  return false; +} +  /// Returns the displacement from the frame register to the stack  /// frame of the specified index, along with the frame register used  /// (in output arg FrameReg). This is the default implementation which @@ -85,6 +92,19 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,    if (MF.getFunction().hasFnAttribute(Attribute::Naked))      return; +  // Noreturn+nounwind functions never restore CSR, so no saves are needed. +  // Purely noreturn functions may still return through throws, so those must +  // save CSR for caller exception handlers. +  // +  // If the function uses longjmp to break out of its current path of +  // execution we do not need the CSR spills either: setjmp stores all CSRs +  // it was called with into the jmp_buf, which longjmp then restores. +  if (MF.getFunction().hasFnAttribute(Attribute::NoReturn) && +        MF.getFunction().hasFnAttribute(Attribute::NoUnwind) && +        !MF.getFunction().hasFnAttribute(Attribute::UWTable) && +        enableCalleeSaveSkip(MF)) +    return; +    // Functions which call __builtin_unwind_init get all their registers saved.    bool CallsUnwindInit = MF.callsUnwindInit();    const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -100,7 +120,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(    // When HHVM function is called, the stack is skewed as the return address    // is removed from the stack before we enter the function.    if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM)) -    return MF.getTarget().getPointerSize(); +    return MF.getTarget().getAllocaPointerSize();    return 0;  } + +int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { +  llvm_unreachable("getInitialCFAOffset() not implemented!"); +} + +unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) +    const { +  llvm_unreachable("getInitialCFARegister() not implemented!"); +}
\ No newline at end of file diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index bd90ed5b55b8..963f8178b509 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -174,6 +174,14 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,    bool Reg2IsUndef = MI.getOperand(Idx2).isUndef();    bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead();    bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); +  // Avoid calling isRenamable for virtual registers since we assert that +  // renamable property is only queried/set for physical registers. +  bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1) +                             ? MI.getOperand(Idx1).isRenamable() +                             : false; +  bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2) +                             ? MI.getOperand(Idx2).isRenamable() +                             : false;    // If destination is tied to either of the commuted source register, then    // it must be updated.    if (HasDef && Reg0 == Reg1 && @@ -211,6 +219,12 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,    CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef);    CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);    CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); +  // Avoid calling setIsRenamable for virtual registers since we assert that +  // renamable property is only queried/set for physical registers. +  if (TargetRegisterInfo::isPhysicalRegister(Reg1)) +    CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable); +  if (TargetRegisterInfo::isPhysicalRegister(Reg2)) +    CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable);    return CommutedMI;  } diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index b29a33ac1c14..43f4bad595e3 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -28,7 +28,6 @@  #include "llvm/CodeGen/MachineMemOperand.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/RuntimeLibcalls.h"  #include "llvm/CodeGen/StackMaps.h"  #include "llvm/CodeGen/TargetLowering.h" @@ -50,6 +49,7 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Target/TargetMachine.h"  #include <algorithm> @@ -118,7 +118,7 @@ static cl::opt<int> MinPercentageForPredictableBranch(  void TargetLoweringBase::InitLibcalls(const Triple &TT) {  #define HANDLE_LIBCALL(code, name) \    setLibcallName(RTLIB::code, name); -#include "llvm/CodeGen/RuntimeLibcalls.def" +#include "llvm/IR/RuntimeLibcalls.def"  #undef HANDLE_LIBCALL    // Initialize calling conventions to their default.    for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) @@ -192,6 +192,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {        return FPEXT_F64_F128;      else if (RetVT == MVT::ppcf128)        return FPEXT_F64_PPCF128; +  } else if (OpVT == MVT::f80) { +    if (RetVT == MVT::f128) +      return FPEXT_F80_F128;    }    return UNKNOWN_LIBCALL; @@ -227,6 +230,9 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {        return FPROUND_F128_F64;      if (OpVT == MVT::ppcf128)        return FPROUND_PPCF128_F64; +  } else if (RetVT == MVT::f80) { +    if (OpVT == MVT::f128) +      return FPROUND_F128_F80;    }    return UNKNOWN_LIBCALL; @@ -529,6 +535,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {    // Perform these initializations only once.    MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =        MaxLoadsPerMemcmp = 8; +  MaxGluedStoresPerMemcpy = 0;    MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =        MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;    UseUnderscoreSetJmp = false; @@ -614,6 +621,12 @@ void TargetLoweringBase::initActions() {      setOperationAction(ISD::SUBCARRY, VT, Expand);      setOperationAction(ISD::SETCCCARRY, VT, Expand); +    // ADDC/ADDE/SUBC/SUBE default to expand. +    setOperationAction(ISD::ADDC, VT, Expand); +    setOperationAction(ISD::ADDE, VT, Expand); +    setOperationAction(ISD::SUBC, VT, Expand); +    setOperationAction(ISD::SUBE, VT, Expand); +      // These default to Expand so they will be expanded to CTLZ/CTTZ by default.      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); @@ -679,12 +692,13 @@ MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,    return MVT::getIntegerVT(8 * DL.getPointerSize(0));  } -EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, -                                         const DataLayout &DL) const { +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL, +                                         bool LegalTypes) const {    assert(LHSTy.isInteger() && "Shift amount is not an integer type!");    if (LHSTy.isVector())      return LHSTy; -  return getScalarShiftAmountTy(DL, LHSTy); +  return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) +                    : getPointerTy(DL);  }  bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { @@ -979,6 +993,36 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,    return MBB;  } +MachineBasicBlock * +TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI, +                                        MachineBasicBlock *MBB) const { +  assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL && +         "Called emitXRayCustomEvent on the wrong MI!"); +  auto &MF = *MI.getMF(); +  auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); +  for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) +    MIB.add(MI.getOperand(OpIdx)); + +  MBB->insert(MachineBasicBlock::iterator(MI), MIB); +  MI.eraseFromParent(); +  return MBB; +} + +MachineBasicBlock * +TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI, +                                       MachineBasicBlock *MBB) const { +  assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL && +         "Called emitXRayTypedEvent on the wrong MI!"); +  auto &MF = *MI.getMF(); +  auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); +  for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) +    MIB.add(MI.getOperand(OpIdx)); + +  MBB->insert(MachineBasicBlock::iterator(MI), MIB); +  MI.eraseFromParent(); +  return MBB; +} +  /// findRepresentativeClass - Return the largest legal super-reg register class  /// of the register class for the specified type and its associated "cost".  // This function is in TargetLowering because it uses RegClassForVT which would @@ -1587,13 +1631,16 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {  // Currently only support "standard" __stack_chk_guard.  // TODO: add LOAD_STACK_GUARD support.  void TargetLoweringBase::insertSSPDeclarations(Module &M) const { -  M.getOrInsertGlobal("__stack_chk_guard", Type::getInt8PtrTy(M.getContext())); +  if (!M.getNamedValue("__stack_chk_guard")) +    new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false, +                       GlobalVariable::ExternalLinkage, +                       nullptr, "__stack_chk_guard");  }  // Currently only support "standard" __stack_chk_guard.  // TODO: add LOAD_STACK_GUARD support.  Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { -  return M.getGlobalVariable("__stack_chk_guard", true); +  return M.getNamedValue("__stack_chk_guard");  }  Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { @@ -1683,7 +1730,7 @@ static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {      return TargetLoweringBase::ReciprocalEstimate::Unspecified;    SmallVector<StringRef, 4> OverrideVector; -  SplitString(Override, OverrideVector, ","); +  Override.split(OverrideVector, ',');    unsigned NumArgs = OverrideVector.size();    // Check if "all", "none", or "default" was specified. @@ -1743,7 +1790,7 @@ static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {      return TargetLoweringBase::ReciprocalEstimate::Unspecified;    SmallVector<StringRef, 4> OverrideVector; -  SplitString(Override, OverrideVector, ","); +  Override.split(OverrideVector, ',');    unsigned NumArgs = OverrideVector.size();    // Check if "all", "default", or "none" was specified. diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 24d4baa31e1f..b5dd2d4cca89 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -91,23 +91,86 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,  //                                  ELF  //===----------------------------------------------------------------------===// -void TargetLoweringObjectFileELF::emitModuleMetadata( -    MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { +void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, +                                             const TargetMachine &TgtM) { +  TargetLoweringObjectFile::Initialize(Ctx, TgtM); +  TM = &TgtM; +} + +void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, +                                                     Module &M) const { +  auto &C = getContext(); + +  if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { +    auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS, +                              ELF::SHF_EXCLUDE); + +    Streamer.SwitchSection(S); + +    for (const auto &Operand : LinkerOptions->operands()) { +      if (cast<MDNode>(Operand)->getNumOperands() != 2) +        report_fatal_error("invalid llvm.linker.options"); +      for (const auto &Option : cast<MDNode>(Operand)->operands()) { +        Streamer.EmitBytes(cast<MDString>(Option)->getString()); +        Streamer.EmitIntValue(0, 1); +      } +    } +  } +    unsigned Version = 0;    unsigned Flags = 0;    StringRef Section;    GetObjCImageInfo(M, Version, Flags, Section); -  if (Section.empty()) +  if (!Section.empty()) { +    auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); +    Streamer.SwitchSection(S); +    Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); +    Streamer.EmitIntValue(Version, 4); +    Streamer.EmitIntValue(Flags, 4); +    Streamer.AddBlankLine(); +  } + +  SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; +  M.getModuleFlagsMetadata(ModuleFlags); + +  MDNode *CFGProfile = nullptr; + +  for (const auto &MFE : ModuleFlags) { +    StringRef Key = MFE.Key->getString(); +    if (Key == "CG Profile") { +      CFGProfile = cast<MDNode>(MFE.Val); +      break; +    } +  } + +  if (!CFGProfile)      return; -  auto &C = getContext(); -  auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); -  Streamer.SwitchSection(S); -  Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); -  Streamer.EmitIntValue(Version, 4); -  Streamer.EmitIntValue(Flags, 4); -  Streamer.AddBlankLine(); +  auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * { +    if (!MDO) +      return nullptr; +    auto V = cast<ValueAsMetadata>(MDO); +    const Function *F = cast<Function>(V->getValue()); +    return TM->getSymbol(F); +  }; + +  for (const auto &Edge : CFGProfile->operands()) { +    MDNode *E = cast<MDNode>(Edge); +    const MCSymbol *From = GetSym(E->getOperand(0)); +    const MCSymbol *To = GetSym(E->getOperand(1)); +    // Skip null functions. This can happen if functions are dead stripped after +    // the CGProfile pass has been run. +    if (!From || !To) +      continue; +    uint64_t Count = cast<ConstantAsMetadata>(E->getOperand(2)) +                         ->getValue() +                         ->getUniqueInteger() +                         .getZExtValue(); +    Streamer.emitCGProfileEntry( +        MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C), +        MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count); +  }  }  MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( @@ -170,7 +233,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(  }  static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { -  // N.B.: The defaults used in here are no the same ones used in MC. +  // N.B.: The defaults used in here are not the same ones used in MC.    // We follow gcc, MC follows gas. For example, given ".section .eh_frame",    // both gas and MC will produce a section with no flags. Given    // section(".eh_frame") gcc will produce: @@ -183,7 +246,7 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {    if (Name.empty() || Name[0] != '.') return K; -  // Some lame default implementation based on some magic section names. +  // Default implementation based on some magic section names.    if (Name == ".bss" ||        Name.startswith(".bss.") ||        Name.startswith(".gnu.linkonce.b.") || @@ -335,7 +398,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(        /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);    // Make sure that we did not get some other section with incompatible sh_link.    // This should not be possible due to UniqueID code above. -  assert(Section->getAssociatedSymbol() == AssociatedSymbol); +  assert(Section->getAssociatedSymbol() == AssociatedSymbol && +         "Associated symbol mismatch between sections");    return Section;  } @@ -617,8 +681,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,    }  } -void TargetLoweringObjectFileMachO::emitModuleMetadata( -    MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { +void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, +                                                       Module &M) const {    // Emit the linker options if present.    if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {      for (const auto &Option : LinkerOptions->operands()) { @@ -727,6 +791,8 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(    if (GO->isWeakForLinker()) {      if (Kind.isReadOnly())        return ConstTextCoalSection; +    if (Kind.isReadOnlyWithRel()) +      return ConstDataCoalSection;      return DataCoalSection;    } @@ -1040,7 +1106,7 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(                                       Selection);  } -static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { +static StringRef getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {    if (Kind.isText())      return ".text";    if (Kind.isBSS()) @@ -1063,7 +1129,8 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(      EmitUniquedSection = TM.getDataSections();    if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) { -    const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); +    SmallString<256> Name = getCOFFSectionNameForUniqueGlobal(Kind); +      unsigned Characteristics = getCOFFSectionFlags(Kind, TM);      Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; @@ -1083,6 +1150,12 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(      if (!ComdatGV->hasPrivateLinkage()) {        MCSymbol *Sym = TM.getSymbol(ComdatGV);        StringRef COMDATSymName = Sym->getName(); + +      // Append "$symbol" to the section name when targetting mingw. The ld.bfd +      // COFF linker will not properly handle comdats otherwise. +      if (getTargetTriple().isWindowsGNUEnvironment()) +        raw_svector_ostream(Name) << '$' << COMDATSymName; +        return getContext().getCOFFSection(Name, Characteristics, Kind,                                           COMDATSymName, Selection, UniqueID);      } else { @@ -1140,17 +1213,18 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(    StringRef COMDATSymName = Sym->getName();    SectionKind Kind = SectionKind::getReadOnly(); -  const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); +  StringRef SecName = getCOFFSectionNameForUniqueGlobal(Kind);    unsigned Characteristics = getCOFFSectionFlags(Kind, TM);    Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;    unsigned UniqueID = NextUniqueID++; -  return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, -                                     COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); +  return getContext().getCOFFSection( +      SecName, Characteristics, Kind, COMDATSymName, +      COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);  } -void TargetLoweringObjectFileCOFF::emitModuleMetadata( -    MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { +void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, +                                                      Module &M) const {    if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {      // Emit the linker options to the linker .drectve section.  According to the      // spec, this section is a space-separated string containing flags for @@ -1250,19 +1324,136 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(    emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler());  } +void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed( +    raw_ostream &OS, const GlobalValue *GV) const { +  emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler()); +} + +const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference( +    const GlobalValue *LHS, const GlobalValue *RHS, +    const TargetMachine &TM) const { +  const Triple &T = TM.getTargetTriple(); +  if (!T.isKnownWindowsMSVCEnvironment() && +      !T.isWindowsItaniumEnvironment() && +      !T.isWindowsCoreCLREnvironment()) +    return nullptr; + +  // Our symbols should exist in address space zero, cowardly no-op if +  // otherwise. +  if (LHS->getType()->getPointerAddressSpace() != 0 || +      RHS->getType()->getPointerAddressSpace() != 0) +    return nullptr; + +  // Both ptrtoint instructions must wrap global objects: +  // - Only global variables are eligible for image relative relocations. +  // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable. +  // We expect __ImageBase to be a global variable without a section, externally +  // defined. +  // +  // It should look something like this: @__ImageBase = external constant i8 +  if (!isa<GlobalObject>(LHS) || !isa<GlobalVariable>(RHS) || +      LHS->isThreadLocal() || RHS->isThreadLocal() || +      RHS->getName() != "__ImageBase" || !RHS->hasExternalLinkage() || +      cast<GlobalVariable>(RHS)->hasInitializer() || RHS->hasSection()) +    return nullptr; + +  return MCSymbolRefExpr::create(TM.getSymbol(LHS), +                                 MCSymbolRefExpr::VK_COFF_IMGREL32, +                                 getContext()); +} + +static std::string APIntToHexString(const APInt &AI) { +  unsigned Width = (AI.getBitWidth() / 8) * 2; +  std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true); +  unsigned Size = HexString.size(); +  assert(Width >= Size && "hex string is too large!"); +  HexString.insert(HexString.begin(), Width - Size, '0'); + +  return HexString; +} + +static std::string scalarConstantToHexString(const Constant *C) { +  Type *Ty = C->getType(); +  if (isa<UndefValue>(C)) { +    return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits())); +  } else if (const auto *CFP = dyn_cast<ConstantFP>(C)) { +    return APIntToHexString(CFP->getValueAPF().bitcastToAPInt()); +  } else if (const auto *CI = dyn_cast<ConstantInt>(C)) { +    return APIntToHexString(CI->getValue()); +  } else { +    unsigned NumElements; +    if (isa<VectorType>(Ty)) +      NumElements = Ty->getVectorNumElements(); +    else +      NumElements = Ty->getArrayNumElements(); +    std::string HexString; +    for (int I = NumElements - 1, E = -1; I != E; --I) +      HexString += scalarConstantToHexString(C->getAggregateElement(I)); +    return HexString; +  } +} + +MCSection *TargetLoweringObjectFileCOFF::getSectionForConstant( +    const DataLayout &DL, SectionKind Kind, const Constant *C, +    unsigned &Align) const { +  if (Kind.isMergeableConst() && C && +      getContext().getAsmInfo()->hasCOFFComdatConstants()) { +    // This creates comdat sections with the given symbol name, but unless +    // AsmPrinter::GetCPISymbol actually makes the symbol global, the symbol +    // will be created with a null storage class, which makes GNU binutils +    // error out. +    const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | +                                     COFF::IMAGE_SCN_MEM_READ | +                                     COFF::IMAGE_SCN_LNK_COMDAT; +    std::string COMDATSymName; +    if (Kind.isMergeableConst4()) { +      if (Align <= 4) { +        COMDATSymName = "__real@" + scalarConstantToHexString(C); +        Align = 4; +      } +    } else if (Kind.isMergeableConst8()) { +      if (Align <= 8) { +        COMDATSymName = "__real@" + scalarConstantToHexString(C); +        Align = 8; +      } +    } else if (Kind.isMergeableConst16()) { +      // FIXME: These may not be appropriate for non-x86 architectures. +      if (Align <= 16) { +        COMDATSymName = "__xmm@" + scalarConstantToHexString(C); +        Align = 16; +      } +    } else if (Kind.isMergeableConst32()) { +      if (Align <= 32) { +        COMDATSymName = "__ymm@" + scalarConstantToHexString(C); +        Align = 32; +      } +    } + +    if (!COMDATSymName.empty()) +      return getContext().getCOFFSection(".rdata", Characteristics, Kind, +                                         COMDATSymName, +                                         COFF::IMAGE_COMDAT_SELECT_ANY); +  } + +  return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C, Align); +} + +  //===----------------------------------------------------------------------===//  //                                  Wasm  //===----------------------------------------------------------------------===// -static void checkWasmComdat(const GlobalValue *GV) { +static const Comdat *getWasmComdat(const GlobalValue *GV) {    const Comdat *C = GV->getComdat();    if (!C) -    return; +    return nullptr; -  // TODO(sbc): At some point we may need COMDAT support but currently -  // they are not supported. -  report_fatal_error("WebAssembly doesn't support COMDATs, '" + C->getName() + -                     "' cannot be lowered."); +  if (C->getSelectionKind() != Comdat::Any) +    report_fatal_error("WebAssembly COMDATs only support " +                       "SelectionKind::Any, '" + C->getName() + "' cannot be " +                       "lowered."); + +  return C;  }  static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) { @@ -1277,17 +1468,32 @@ static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) {  MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(      const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { +  // We don't support explict section names for functions in the wasm object +  // format.  Each function has to be in its own unique section. +  if (isa<Function>(GO)) { +    return SelectSectionForGlobal(GO, Kind, TM); +  } +    StringRef Name = GO->getSection(); -  checkWasmComdat(GO); +    Kind = getWasmKindForNamedSection(Name, Kind); -  return getContext().getWasmSection(Name, Kind); + +  StringRef Group = ""; +  if (const Comdat *C = getWasmComdat(GO)) { +    Group = C->getName(); +  } + +  return getContext().getWasmSection(Name, Kind, Group, +                                     MCContext::GenericSectionID);  }  static MCSectionWasm *selectWasmSectionForGlobal(      MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,      const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) {    StringRef Group = ""; -  checkWasmComdat(GO); +  if (const Comdat *C = getWasmComdat(GO)) { +    Group = C->getName(); +  }    bool UniqueSectionNames = TM.getUniqueSectionNames();    SmallString<128> Name = getSectionPrefixForGlobal(Kind); diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp index 3e6ad3eeef0f..3fca2f4ee4fe 100644 --- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -41,6 +41,7 @@  #include "llvm/Support/Threading.h"  #include "llvm/Target/TargetMachine.h"  #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/SymbolRewriter.h"  #include <cassert>  #include <string> @@ -80,6 +81,9 @@ static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",      cl::desc("Disable Machine LICM"));  static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,      cl::desc("Disable Machine Sinking")); +static cl::opt<bool> DisablePostRAMachineSink("disable-postra-machine-sink", +    cl::Hidden, +    cl::desc("Disable PostRA Machine Sinking"));  static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,      cl::desc("Disable Loop Strength Reduction Pass"));  static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting", @@ -94,10 +98,9 @@ static cl::opt<bool> EnableImplicitNullChecks(      "enable-implicit-null-checks",      cl::desc("Fold null checks into faulting memory operations"),      cl::init(false), cl::Hidden); -static cl::opt<bool> -    EnableMergeICmps("enable-mergeicmps", -                     cl::desc("Merge ICmp chains into a single memcmp"), -                     cl::init(false), cl::Hidden); +static cl::opt<bool> DisableMergeICmps("disable-mergeicmps", +    cl::desc("Disable MergeICmps Pass"), +    cl::init(false), cl::Hidden);  static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,      cl::desc("Print LLVM IR produced by the loop-reduce pass"));  static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, @@ -108,14 +111,16 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,      cl::desc("Verify generated machine code"),      cl::init(false),      cl::ZeroOrMore); -static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner", -    cl::Hidden, -    cl::desc("Enable machine outliner")); -static cl::opt<bool> EnableLinkOnceODROutlining( -    "enable-linkonceodr-outlining", -    cl::Hidden, -    cl::desc("Enable the machine outliner on linkonceodr functions"), -    cl::init(false)); +enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault }; +// Enable or disable the MachineOutliner. +static cl::opt<RunOutliner> EnableMachineOutliner( +    "enable-machine-outliner", cl::desc("Enable the machine outliner"), +    cl::Hidden, cl::ValueOptional, cl::init(TargetDefault), +    cl::values(clEnumValN(AlwaysOutline, "always", +                          "Run on all functions guaranteed to be beneficial"), +               clEnumValN(NeverOutline, "never", "Disable all outlining"), +               // Sentinel value for unspecified option. +               clEnumValN(AlwaysOutline, "", "")));  // Enable or disable FastISel. Both options are needed, because  // FastISel is enabled by default with -fast, and we wish to be  // able to enable or disable fast-isel independently from -O0. @@ -123,9 +128,9 @@ static cl::opt<cl::boolOrDefault>  EnableFastISelOption("fast-isel", cl::Hidden,    cl::desc("Enable the \"fast\" instruction selector")); -static cl::opt<cl::boolOrDefault> -    EnableGlobalISel("global-isel", cl::Hidden, -                     cl::desc("Enable the \"global\" instruction selector")); +static cl::opt<cl::boolOrDefault> EnableGlobalISelOption( +    "global-isel", cl::Hidden, +    cl::desc("Enable the \"global\" instruction selector"));  static cl::opt<std::string> PrintMachineInstrs(      "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), @@ -226,7 +231,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,    if (StandardID == &TailDuplicateID)      return applyDisable(TargetID, DisableTailDuplicate); -  if (StandardID == &TargetPassConfig::EarlyTailDuplicateID) +  if (StandardID == &EarlyTailDuplicateID)      return applyDisable(TargetID, DisableEarlyTailDup);    if (StandardID == &MachineBlockPlacementID) @@ -241,18 +246,21 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,    if (StandardID == &EarlyIfConverterID)      return applyDisable(TargetID, DisableEarlyIfConversion); -  if (StandardID == &MachineLICMID) +  if (StandardID == &EarlyMachineLICMID)      return applyDisable(TargetID, DisableMachineLICM);    if (StandardID == &MachineCSEID)      return applyDisable(TargetID, DisableMachineCSE); -  if (StandardID == &TargetPassConfig::PostRAMachineLICMID) +  if (StandardID == &MachineLICMID)      return applyDisable(TargetID, DisablePostRAMachineLICM);    if (StandardID == &MachineSinkingID)      return applyDisable(TargetID, DisableMachineSink); +  if (StandardID == &PostRAMachineSinkingID) +    return applyDisable(TargetID, DisablePostRAMachineSink); +    if (StandardID == &MachineCopyPropagationID)      return applyDisable(TargetID, DisableCopyProp); @@ -267,10 +275,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",                  "Target Pass Configuration", false, false)  char TargetPassConfig::ID = 0; -// Pseudo Pass IDs. -char TargetPassConfig::EarlyTailDuplicateID = 0; -char TargetPassConfig::PostRAMachineLICMID = 0; -  namespace {  struct InsertedPass { @@ -366,10 +370,6 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)    initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());    initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); -  // Substitute Pseudo Pass IDs for real ones. -  substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); -  substitutePass(&PostRAMachineLICMID, &MachineLICMID); -    if (StringRef(PrintMachineInstrs.getValue()).equals(""))      TM.Options.PrintMachineCode = true; @@ -604,7 +604,7 @@ void TargetPassConfig::addIRPasses() {      // loads and compares. ExpandMemCmpPass then tries to expand those calls      // into optimally-sized loads and compares. The transforms are enabled by a      // target lowering hook. -    if (EnableMergeICmps) +    if (!DisableMergeICmps)        addPass(createMergeICmpsPass());      addPass(createExpandMemCmpPass());    } @@ -662,6 +662,14 @@ void TargetPassConfig::addPassesToHandleExceptions() {      addPass(createWinEHPass());      addPass(createDwarfEHPass());      break; +  case ExceptionHandling::Wasm: +    // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs +    // on catchpads and cleanuppads because it does not outline them into +    // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we +    // should remove PHIs there. +    addPass(createWinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); +    addPass(createWasmEHPass()); +    break;    case ExceptionHandling::None:      addPass(createLowerInvokePass()); @@ -704,19 +712,18 @@ void TargetPassConfig::addISelPrepare() {  }  bool TargetPassConfig::addCoreISelPasses() { -  // Enable FastISel with -fast, but allow that to be overridden. +  // Enable FastISel with -fast-isel, but allow that to be overridden.    TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);    if (EnableFastISelOption == cl::BOU_TRUE ||        (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()))      TM->setFastISel(true); -  // Ask the target for an isel. -  // Enable GlobalISel if the target wants to, but allow that to be overriden. +  // Ask the target for an instruction selector.    // Explicitly enabling fast-isel should override implicitly enabled    // global-isel. -  if (EnableGlobalISel == cl::BOU_TRUE || -      (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() && -       EnableFastISelOption != cl::BOU_TRUE)) { +  if (EnableGlobalISelOption == cl::BOU_TRUE || +      (EnableGlobalISelOption == cl::BOU_UNSET && +       TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) {      TM->setFastISel(false);      if (addIRTranslator()) @@ -755,7 +762,7 @@ bool TargetPassConfig::addCoreISelPasses() {  }  bool TargetPassConfig::addISelPasses() { -  if (TM->Options.EmulatedTLS) +  if (TM->useEmulatedTLS())      addPass(createLowerEmuTLSPass());    addPass(createPreISelIntrinsicLoweringPass()); @@ -844,8 +851,10 @@ void TargetPassConfig::addMachinePasses() {    addPostRegAlloc();    // Insert prolog/epilog code.  Eliminate abstract frame index references... -  if (getOptLevel() != CodeGenOpt::None) +  if (getOptLevel() != CodeGenOpt::None) { +    addPass(&PostRAMachineSinkingID);      addPass(&ShrinkWrapID); +  }    // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only    // do so if it hasn't been disabled, substituted, or overridden. @@ -904,8 +913,14 @@ void TargetPassConfig::addMachinePasses() {    addPass(&XRayInstrumentationID, false);    addPass(&PatchableFunctionID, false); -  if (EnableMachineOutliner) -    PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining)); +  if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && +      EnableMachineOutliner != NeverOutline) { +    bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); +    bool AddOutliner = RunOnAllFunctions || +                       TM->Options.SupportsDefaultOutlining; +    if (AddOutliner) +      addPass(createMachineOutlinerPass(RunOnAllFunctions)); +  }    // Add passes that directly emit MI after all other MI passes.    addPreEmitPass2(); @@ -941,7 +956,7 @@ void TargetPassConfig::addMachineSSAOptimization() {    // loop info, just like LICM and CSE below.    addILPOpts(); -  addPass(&MachineLICMID, false); +  addPass(&EarlyMachineLICMID, false);    addPass(&MachineCSEID, false);    addPass(&MachineSinkingID); @@ -1090,10 +1105,14 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {      // kill markers.      addPass(&StackSlotColoringID); +    // Copy propagate to forward register uses and try to eliminate COPYs that +    // were not coalesced. +    addPass(&MachineCopyPropagationID); +      // Run post-ra machine LICM to hoist reloads / remats.      //      // FIXME: can this move into MachineLateOptimization? -    addPass(&PostRAMachineLICMID); +    addPass(&MachineLICMID);    }  } @@ -1135,18 +1154,13 @@ void TargetPassConfig::addBlockPlacement() {  //===---------------------------------------------------------------------===//  /// GlobalISel Configuration  //===---------------------------------------------------------------------===// - -bool TargetPassConfig::isGlobalISelEnabled() const { -  return false; -} -  bool TargetPassConfig::isGlobalISelAbortEnabled() const {    if (EnableGlobalISelAbort.getNumOccurrences() > 0)      return EnableGlobalISelAbort == 1;    // When no abort behaviour is specified, we don't abort if the target says    // that GISel is enabled. -  return !isGlobalISelEnabled(); +  return !TM->Options.EnableGlobalISel;  }  bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const { diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index f03c3b8300f3..661dc18f7a85 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -19,15 +19,16 @@  #include "llvm/CodeGen/MachineFrameInfo.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/Function.h"  #include "llvm/MC/MCRegisterInfo.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/MachineValueType.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/Printable.h"  #include "llvm/Support/raw_ostream.h" @@ -86,18 +87,24 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,  namespace llvm {  Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, -                   unsigned SubIdx) { -  return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { +                   unsigned SubIdx, const MachineRegisterInfo *MRI) { +  return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) {      if (!Reg) -      OS << "%noreg"; +      OS << "$noreg";      else if (TargetRegisterInfo::isStackSlot(Reg))        OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); -    else if (TargetRegisterInfo::isVirtualRegister(Reg)) -      OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); +    else if (TargetRegisterInfo::isVirtualRegister(Reg)) { +      StringRef Name = MRI ? MRI->getVRegName(Reg) : ""; +      if (Name != "") { +        OS << '%' << Name; +      } else { +        OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); +      } +    }      else if (!TRI) -      OS << '%' << "physreg" << Reg; +      OS << '$' << "physreg" << Reg;      else if (Reg < TRI->getNumRegs()) { -      OS << '%'; +      OS << '$';        printLowerCase(TRI->getName(Reg), OS);      } else        llvm_unreachable("Register kind is unsupported."); @@ -338,7 +345,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,    return BestRC;  } -/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// Check if the registers defined by the pair (RegisterClass, SubReg)  /// share the same register file.  static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,                                    const TargetRegisterClass *DefRC, @@ -436,7 +443,8 @@ bool TargetRegisterInfo::needsStackRealignment(    if (F.hasFnAttribute("stackrealign") || requiresRealignment) {      if (canRealignStack(MF))        return true; -    DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n"); +    LLVM_DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() +                      << "\n");    }    return false;  } @@ -450,6 +458,51 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,    return true;  } +unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, +                                         const MachineRegisterInfo &MRI) const { +  const TargetRegisterClass *RC{}; +  if (isPhysicalRegister(Reg)) { +    // The size is not directly available for physical registers. +    // Instead, we need to access a register class that contains Reg and +    // get the size of that register class. +    RC = getMinimalPhysRegClass(Reg); +  } else { +    LLT Ty = MRI.getType(Reg); +    unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; +    // If Reg is not a generic register, query the register class to +    // get its size. +    if (RegSize) +      return RegSize; +    // Since Reg is not a generic register, it must have a register class. +    RC = MRI.getRegClass(Reg); +  } +  assert(RC && "Unable to deduce the register class"); +  return getRegSizeInBits(*RC); +} + +unsigned +TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, +                                     const MachineRegisterInfo *MRI) const { +  while (true) { +    const MachineInstr *MI = MRI->getVRegDef(SrcReg); +    if (!MI->isCopyLike()) +      return SrcReg; + +    unsigned CopySrcReg; +    if (MI->isCopy()) +      CopySrcReg = MI->getOperand(1).getReg(); +    else { +      assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike"); +      CopySrcReg = MI->getOperand(2).getReg(); +    } + +    if (!isVirtualRegister(CopySrcReg)) +      return CopySrcReg; + +    SrcReg = CopySrcReg; +  } +} +  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)  LLVM_DUMP_METHOD  void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 86dbf1b2aeab..3cff31ad4933 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -61,12 +61,10 @@ static unsigned lcm(unsigned A, unsigned B) {    return LCM;  } -void TargetSchedModel::init(const MCSchedModel &sm, -                            const TargetSubtargetInfo *sti, -                            const TargetInstrInfo *tii) { -  SchedModel = sm; -  STI = sti; -  TII = tii; +void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) { +  STI = TSInfo; +  SchedModel = TSInfo->getSchedModel(); +  TII = TSInfo->getInstrInfo();    STI->initInstrItins(InstrItins);    unsigned NumRes = SchedModel.getNumProcResourceKinds(); @@ -257,31 +255,19 @@ unsigned TargetSchedModel::computeOperandLatency(  unsigned  TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const { -  unsigned Latency = 0; -  for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries; -       DefIdx != DefEnd; ++DefIdx) { -    // Lookup the definition's write latency in SubtargetInfo. -    const MCWriteLatencyEntry *WLEntry = -      STI->getWriteLatencyEntry(&SCDesc, DefIdx); -    Latency = std::max(Latency, capLatency(WLEntry->Cycles)); -  } -  return Latency; +  return capLatency(MCSchedModel::computeInstrLatency(*STI, SCDesc));  }  unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {    assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); -    unsigned SCIdx = TII->get(Opcode).getSchedClass(); -  const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); - -  if (SCDesc->isValid() && !SCDesc->isVariant()) -    return computeInstrLatency(*SCDesc); +  return capLatency(SchedModel.computeInstrLatency(*STI, SCIdx)); +} -  if (SCDesc->isValid()) { -    assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()"); -    return computeInstrLatency(*SCDesc); -  } -  return 0; +unsigned TargetSchedModel::computeInstrLatency(const MCInst &Inst) const { +  if (hasInstrSchedModel()) +    return capLatency(SchedModel.computeInstrLatency(*STI, *TII, Inst)); +  return computeInstrLatency(Inst.getOpcode());  }  unsigned @@ -336,71 +322,39 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,    return 0;  } -static Optional<double> -getRThroughputFromItineraries(unsigned schedClass, -                              const InstrItineraryData *IID){ -  Optional<double> Throughput; - -  for (const InstrStage *IS = IID->beginStage(schedClass), -                        *E = IID->endStage(schedClass); -       IS != E; ++IS) { -    if (IS->getCycles()) { -      double Temp = countPopulation(IS->getUnits()) * 1.0 / IS->getCycles(); -      Throughput = Throughput.hasValue() -                        ? std::min(Throughput.getValue(), Temp) -                        : Temp; -    } -  } -  if (Throughput.hasValue()) -    // We need reciprocal throughput that's why we return such value. -    return 1 / Throughput.getValue(); -  return Throughput; -} - -static Optional<double> -getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, -                                  const TargetSubtargetInfo *STI, -                                  const MCSchedModel &SchedModel) { -  Optional<double> Throughput; - -  for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc), -                                 *WEnd = STI->getWriteProcResEnd(SCDesc); -       WPR != WEnd; ++WPR) { -    if (WPR->Cycles) { -      unsigned NumUnits = -          SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits; -      double Temp = NumUnits * 1.0 / WPR->Cycles; -      Throughput = Throughput.hasValue() -                       ? std::min(Throughput.getValue(), Temp) -                       : Temp; -    } +double +TargetSchedModel::computeReciprocalThroughput(const MachineInstr *MI) const { +  if (hasInstrItineraries()) { +    unsigned SchedClass = MI->getDesc().getSchedClass(); +    return MCSchedModel::getReciprocalThroughput(SchedClass, +                                                 *getInstrItineraries());    } -  if (Throughput.hasValue()) -    // We need reciprocal throughput that's why we return such value. -    return 1 / Throughput.getValue(); -  return Throughput; -} -Optional<double> -TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const { -  if (hasInstrItineraries()) -    return getRThroughputFromItineraries(MI->getDesc().getSchedClass(), -                                         getInstrItineraries());    if (hasInstrSchedModel()) -    return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI, -                                             SchedModel); -  return Optional<double>(); +    return MCSchedModel::getReciprocalThroughput(*STI, *resolveSchedClass(MI)); + +  return 0.0;  } -Optional<double> -TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const { +double +TargetSchedModel::computeReciprocalThroughput(unsigned Opcode) const {    unsigned SchedClass = TII->get(Opcode).getSchedClass();    if (hasInstrItineraries()) -    return getRThroughputFromItineraries(SchedClass, getInstrItineraries()); +    return MCSchedModel::getReciprocalThroughput(SchedClass, +                                                 *getInstrItineraries());    if (hasInstrSchedModel()) { -    const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); -    if (SCDesc->isValid() && !SCDesc->isVariant()) -      return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel); +    const MCSchedClassDesc &SCDesc = *SchedModel.getSchedClassDesc(SchedClass); +    if (SCDesc.isValid() && !SCDesc.isVariant()) +      return MCSchedModel::getReciprocalThroughput(*STI, SCDesc);    } -  return Optional<double>(); + +  return 0.0;  } + +double +TargetSchedModel::computeReciprocalThroughput(const MCInst &MI) const { +  if (hasInstrSchedModel()) +    return SchedModel.getReciprocalThroughput(*STI, *TII, MI); +  return computeReciprocalThroughput(MI.getOpcode()); +} + diff --git a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index 8693f344f9be..fa29c05fd6c2 100644 --- a/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -67,18 +67,15 @@ bool TargetSubtargetInfo::useAA() const {    return false;  } -static std::string createSchedInfoStr(unsigned Latency, -                                     Optional<double> RThroughput) { +static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {    static const char *SchedPrefix = " sched: [";    std::string Comment;    raw_string_ostream CS(Comment); -  if (Latency > 0 && RThroughput.hasValue()) -    CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue()) +  if (RThroughput != 0.0) +    CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)         << "]"; -  else if (Latency > 0) +  else      CS << SchedPrefix << Latency << ":?]"; -  else if (RThroughput.hasValue()) -    CS << SchedPrefix << "?:" << RThroughput.getValue() << "]";    CS.flush();    return Comment;  } @@ -90,9 +87,9 @@ std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {    // We don't cache TSchedModel because it depends on TargetInstrInfo    // that could be changed during the compilation    TargetSchedModel TSchedModel; -  TSchedModel.init(getSchedModel(), this, getInstrInfo()); +  TSchedModel.init(this);    unsigned Latency = TSchedModel.computeInstrLatency(&MI); -  Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI); +  double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);    return createSchedInfoStr(Latency, RThroughput);  } @@ -101,17 +98,19 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {    // We don't cache TSchedModel because it depends on TargetInstrInfo    // that could be changed during the compilation    TargetSchedModel TSchedModel; -  TSchedModel.init(getSchedModel(), this, getInstrInfo()); +  TSchedModel.init(this);    unsigned Latency;    if (TSchedModel.hasInstrSchedModel()) -    Latency = TSchedModel.computeInstrLatency(MCI.getOpcode()); +    Latency = TSchedModel.computeInstrLatency(MCI);    else if (TSchedModel.hasInstrItineraries()) {      auto *ItinData = TSchedModel.getInstrItineraries();      Latency = ItinData->getStageLatency(          getInstrInfo()->get(MCI.getOpcode()).getSchedClass());    } else      return std::string(); -  Optional<double> RThroughput = -      TSchedModel.computeInstrRThroughput(MCI.getOpcode()); +  double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);    return createSchedInfoStr(Latency, RThroughput);  } + +void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { +} diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 774b76f84b7f..0ca435016ead 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -290,8 +290,8 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,    unsigned NumVisited = 0;    for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) { -    // DBG_VALUE cannot be counted against the limit. -    if (OtherMI.isDebugValue()) +    // Debug instructions cannot be counted against the limit. +    if (OtherMI.isDebugInstr())        continue;      if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.        return false; @@ -685,15 +685,15 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,                                                     unsigned RegCIdx,                                                     unsigned Dist) {    unsigned RegC = MI->getOperand(RegCIdx).getReg(); -  DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI); +  LLVM_DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);    MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx);    if (NewMI == nullptr) { -    DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); +    LLVM_DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");      return false;    } -  DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); +  LLVM_DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);    assert(NewMI == MI &&           "TargetInstrInfo::commuteInstruction() should not return a new "           "instruction unless it was requested."); @@ -740,8 +740,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,    if (!NewMI)      return false; -  DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); -  DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI); +  LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); +  LLVM_DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);    bool Sunk = false;    if (LIS) @@ -940,8 +940,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,    MachineBasicBlock::iterator KillPos = KillMI;    ++KillPos;    for (MachineInstr &OtherMI : make_range(End, KillPos)) { -    // DBG_VALUE cannot be counted against the limit. -    if (OtherMI.isDebugValue()) +    // Debug instructions cannot be counted against the limit. +    if (OtherMI.isDebugInstr())        continue;      if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.        return false; @@ -985,7 +985,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,    }    // Move debug info as well. -  while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue()) +  while (Begin != MBB->begin() && std::prev(Begin)->isDebugInstr())      --Begin;    nmi = End; @@ -1014,7 +1014,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,      LV->addVirtualRegisterKilled(Reg, *MI);    } -  DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); +  LLVM_DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);    return true;  } @@ -1114,8 +1114,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,    unsigned NumVisited = 0;    for (MachineInstr &OtherMI :         make_range(mi, MachineBasicBlock::iterator(KillMI))) { -    // DBG_VALUE cannot be counted against the limit. -    if (OtherMI.isDebugValue()) +    // Debug instructions cannot be counted against the limit. +    if (OtherMI.isDebugInstr())        continue;      if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.        return false; @@ -1162,11 +1162,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,    // Move the old kill above MI, don't forget to move debug info as well.    MachineBasicBlock::iterator InsertPos = mi; -  while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue()) +  while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugInstr())      --InsertPos;    MachineBasicBlock::iterator From = KillMI;    MachineBasicBlock::iterator To = std::next(From); -  while (std::prev(From)->isDebugValue()) +  while (std::prev(From)->isDebugInstr())      --From;    MBB->splice(InsertPos, MBB, From, To); @@ -1181,7 +1181,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,      LV->addVirtualRegisterKilled(Reg, *MI);    } -  DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); +  LLVM_DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);    return true;  } @@ -1205,6 +1205,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,    if (!MI->isCommutable())      return false; +  bool MadeChange = false;    unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();    unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();    unsigned OpsNum = MI->getDesc().getNumOperands(); @@ -1223,8 +1224,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,      // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp      // operands. This makes the live ranges of DstOp and OtherOp joinable. -    bool DoCommute = -        !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); +    bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); +    bool DoCommute = !BaseOpKilled && OtherOpKilled;      if (!DoCommute &&          isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) { @@ -1235,13 +1236,21 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,      // If it's profitable to commute, try to do so.      if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx,                                          Dist)) { +      MadeChange = true;        ++NumCommuted; -      if (AggressiveCommute) +      if (AggressiveCommute) {          ++NumAggrCommuted; -      return true; +        // There might be more than two commutable operands, update BaseOp and +        // continue scanning. +        BaseOpReg = OtherOpReg; +        BaseOpKilled = OtherOpKilled; +        continue; +      } +      // If this was a commute based on kill, we won't do better continuing. +      return MadeChange;      }    } -  return false; +  return MadeChange;  }  /// For the case where an instruction has a single pair of tied register @@ -1343,7 +1352,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,        const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);        if (UnfoldMCID.getNumDefs() == 1) {          // Unfold the load. -        DEBUG(dbgs() << "2addr:   UNFOLDING: " << MI); +        LLVM_DEBUG(dbgs() << "2addr:   UNFOLDING: " << MI);          const TargetRegisterClass *RC =            TRI->getAllocatableClass(              TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); @@ -1352,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,          if (!TII->unfoldMemoryOperand(*MF, MI, Reg,                                        /*UnfoldLoad=*/true,                                        /*UnfoldStore=*/false, NewMIs)) { -          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); +          LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");            return false;          }          assert(NewMIs.size() == 2 && @@ -1365,8 +1374,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,          MBB->insert(mi, NewMIs[0]);          MBB->insert(mi, NewMIs[1]); -        DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0] -                     << "2addr:    NEW INST: " << *NewMIs[1]); +        LLVM_DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0] +                          << "2addr:    NEW INST: " << *NewMIs[1]);          // Transform the instruction, now that it no longer has a load.          unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); @@ -1431,7 +1440,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,            // Transforming didn't eliminate the tie and didn't lead to an            // improvement. Clean up the unfolded instructions and keep the            // original. -          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); +          LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");            NewMIs[0]->eraseFromParent();            NewMIs[1]->eraseFromParent();          } @@ -1475,7 +1484,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {            MRI->constrainRegClass(DstReg, RC);        SrcMO.setReg(DstReg);        SrcMO.setSubReg(0); -      DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); +      LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);        continue;      }      TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); @@ -1574,7 +1583,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,        }      } -    DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); +    LLVM_DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);      MachineOperand &MO = MI->getOperand(SrcIdx);      assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && @@ -1668,9 +1677,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {    bool MadeChange = false; -  DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); -  DEBUG(dbgs() << "********** Function: " -        << MF->getName() << '\n'); +  LLVM_DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); +  LLVM_DEBUG(dbgs() << "********** Function: " << MF->getName() << '\n');    // This pass takes the function out of SSA form.    MRI->leaveSSA(); @@ -1690,7 +1698,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {        MachineBasicBlock::iterator nmi = std::next(mi);        // Don't revisit an instruction previously converted by target. It may        // contain undef register operands (%noreg), which are not handled. -      if (mi->isDebugValue() || SunkInstrs.count(&*mi)) { +      if (mi->isDebugInstr() || SunkInstrs.count(&*mi)) {          mi = nmi;          continue;        } @@ -1713,7 +1721,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {        ++NumTwoAddressInstrs;        MadeChange = true; -      DEBUG(dbgs() << '\t' << *mi); +      LLVM_DEBUG(dbgs() << '\t' << *mi);        // If the instruction has a single pair of tied operands, try some        // transformations that may either eliminate the tied operands or @@ -1740,7 +1748,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {        // Now iterate over the information collected above.        for (auto &TO : TiedOperands) {          processTiedPairs(&*mi, TO.second, Dist); -        DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); +        LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);        }        // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. @@ -1754,7 +1762,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {          mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());          mi->RemoveOperand(1);          mi->setDesc(TII->get(TargetOpcode::COPY)); -        DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); +        LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);        }        // Clear TiedOperands here instead of at the top of the loop @@ -1787,7 +1795,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {    if (MI.getOperand(0).getSubReg() ||        TargetRegisterInfo::isPhysicalRegister(DstReg) ||        !(MI.getNumOperands() & 1)) { -    DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); +    LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);      llvm_unreachable(nullptr);    } @@ -1838,19 +1846,19 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {      if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))        LV->replaceKillInstruction(SrcReg, MI, *CopyMI); -    DEBUG(dbgs() << "Inserted: " << *CopyMI); +    LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI);    }    MachineBasicBlock::iterator EndMBBI =        std::next(MachineBasicBlock::iterator(MI));    if (!DefEmitted) { -    DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF"); +    LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");      MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));      for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)        MI.RemoveOperand(j);    } else { -    DEBUG(dbgs() << "Eliminated: " << MI); +    LLVM_DEBUG(dbgs() << "Eliminated: " << MI);      MI.eraseFromParent();    } diff --git a/contrib/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm/lib/CodeGen/ValueTypes.cpp new file mode 100644 index 000000000000..adb7075de651 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ValueTypes.cpp @@ -0,0 +1,321 @@ +//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +EVT EVT::changeExtendedTypeToInteger() const { +  LLVMContext &Context = LLVMTy->getContext(); +  return getIntegerVT(Context, getSizeInBits()); +} + +EVT EVT::changeExtendedVectorElementTypeToInteger() const { +  LLVMContext &Context = LLVMTy->getContext(); +  EVT IntTy = getIntegerVT(Context, getScalarSizeInBits()); +  return getVectorVT(Context, IntTy, getVectorNumElements()); +} + +EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) { +  EVT VT; +  VT.LLVMTy = IntegerType::get(Context, BitWidth); +  assert(VT.isExtended() && "Type is not extended!"); +  return VT; +} + +EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, +                             unsigned NumElements) { +  EVT ResultVT; +  ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements); +  assert(ResultVT.isExtended() && "Type is not extended!"); +  return ResultVT; +} + +bool EVT::isExtendedFloatingPoint() const { +  assert(isExtended() && "Type is not extended!"); +  return LLVMTy->isFPOrFPVectorTy(); +} + +bool EVT::isExtendedInteger() const { +  assert(isExtended() && "Type is not extended!"); +  return LLVMTy->isIntOrIntVectorTy(); +} + +bool EVT::isExtendedScalarInteger() const { +  assert(isExtended() && "Type is not extended!"); +  return LLVMTy->isIntegerTy(); +} + +bool EVT::isExtendedVector() const { +  assert(isExtended() && "Type is not extended!"); +  return LLVMTy->isVectorTy(); +} + +bool EVT::isExtended16BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 16; +} + +bool EVT::isExtended32BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 32; +} + +bool EVT::isExtended64BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 64; +} + +bool EVT::isExtended128BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 128; +} + +bool EVT::isExtended256BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 256; +} + +bool EVT::isExtended512BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 512; +} + +bool EVT::isExtended1024BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 1024; +} + +bool EVT::isExtended2048BitVector() const { +  return isExtendedVector() && getExtendedSizeInBits() == 2048; +} + +EVT EVT::getExtendedVectorElementType() const { +  assert(isExtended() && "Type is not extended!"); +  return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType()); +} + +unsigned EVT::getExtendedVectorNumElements() const { +  assert(isExtended() && "Type is not extended!"); +  return cast<VectorType>(LLVMTy)->getNumElements(); +} + +unsigned EVT::getExtendedSizeInBits() const { +  assert(isExtended() && "Type is not extended!"); +  if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy)) +    return ITy->getBitWidth(); +  if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy)) +    return VTy->getBitWidth(); +  llvm_unreachable("Unrecognized extended type!"); +} + +/// getEVTString - This function returns value type as a string, e.g. "i32". +std::string EVT::getEVTString() const { +  switch (V.SimpleTy) { +  default: +    if (isVector()) +      return "v" + utostr(getVectorNumElements()) + +             getVectorElementType().getEVTString(); +    if (isInteger()) +      return "i" + utostr(getSizeInBits()); +    llvm_unreachable("Invalid EVT!"); +  case MVT::i1:      return "i1"; +  case MVT::i8:      return "i8"; +  case MVT::i16:     return "i16"; +  case MVT::i32:     return "i32"; +  case MVT::i64:     return "i64"; +  case MVT::i128:    return "i128"; +  case MVT::f16:     return "f16"; +  case MVT::f32:     return "f32"; +  case MVT::f64:     return "f64"; +  case MVT::f80:     return "f80"; +  case MVT::f128:    return "f128"; +  case MVT::ppcf128: return "ppcf128"; +  case MVT::isVoid:  return "isVoid"; +  case MVT::Other:   return "ch"; +  case MVT::Glue:    return "glue"; +  case MVT::x86mmx:  return "x86mmx"; +  case MVT::v1i1:    return "v1i1"; +  case MVT::v2i1:    return "v2i1"; +  case MVT::v4i1:    return "v4i1"; +  case MVT::v8i1:    return "v8i1"; +  case MVT::v16i1:   return "v16i1"; +  case MVT::v32i1:   return "v32i1"; +  case MVT::v64i1:   return "v64i1"; +  case MVT::v128i1:  return "v128i1"; +  case MVT::v512i1:  return "v512i1"; +  case MVT::v1024i1: return "v1024i1"; +  case MVT::v1i8:    return "v1i8"; +  case MVT::v2i8:    return "v2i8"; +  case MVT::v4i8:    return "v4i8"; +  case MVT::v8i8:    return "v8i8"; +  case MVT::v16i8:   return "v16i8"; +  case MVT::v32i8:   return "v32i8"; +  case MVT::v64i8:   return "v64i8"; +  case MVT::v128i8:  return "v128i8"; +  case MVT::v256i8:  return "v256i8"; +  case MVT::v1i16:   return "v1i16"; +  case MVT::v2i16:   return "v2i16"; +  case MVT::v4i16:   return "v4i16"; +  case MVT::v8i16:   return "v8i16"; +  case MVT::v16i16:  return "v16i16"; +  case MVT::v32i16:  return "v32i16"; +  case MVT::v64i16:  return "v64i16"; +  case MVT::v128i16: return "v128i16"; +  case MVT::v1i32:   return "v1i32"; +  case MVT::v2i32:   return "v2i32"; +  case MVT::v4i32:   return "v4i32"; +  case MVT::v8i32:   return "v8i32"; +  case MVT::v16i32:  return "v16i32"; +  case MVT::v32i32:  return "v32i32"; +  case MVT::v64i32:  return "v64i32"; +  case MVT::v1i64:   return "v1i64"; +  case MVT::v2i64:   return "v2i64"; +  case MVT::v4i64:   return "v4i64"; +  case MVT::v8i64:   return "v8i64"; +  case MVT::v16i64:  return "v16i64"; +  case MVT::v32i64:  return "v32i64"; +  case MVT::v1i128:  return "v1i128"; +  case MVT::v1f32:   return "v1f32"; +  case MVT::v2f32:   return "v2f32"; +  case MVT::v2f16:   return "v2f16"; +  case MVT::v4f16:   return "v4f16"; +  case MVT::v8f16:   return "v8f16"; +  case MVT::v4f32:   return "v4f32"; +  case MVT::v8f32:   return "v8f32"; +  case MVT::v16f32:  return "v16f32"; +  case MVT::v1f64:   return "v1f64"; +  case MVT::v2f64:   return "v2f64"; +  case MVT::v4f64:   return "v4f64"; +  case MVT::v8f64:   return "v8f64"; +  case MVT::Metadata:return "Metadata"; +  case MVT::Untyped: return "Untyped"; +  case MVT::ExceptRef: return "ExceptRef"; +  } +} + +/// getTypeForEVT - This method returns an LLVM type corresponding to the +/// specified EVT.  For integer types, this returns an unsigned type.  Note +/// that this will abort for types that cannot be represented. +Type *EVT::getTypeForEVT(LLVMContext &Context) const { +  switch (V.SimpleTy) { +  default: +    assert(isExtended() && "Type is not extended!"); +    return LLVMTy; +  case MVT::isVoid:  return Type::getVoidTy(Context); +  case MVT::i1:      return Type::getInt1Ty(Context); +  case MVT::i8:      return Type::getInt8Ty(Context); +  case MVT::i16:     return Type::getInt16Ty(Context); +  case MVT::i32:     return Type::getInt32Ty(Context); +  case MVT::i64:     return Type::getInt64Ty(Context); +  case MVT::i128:    return IntegerType::get(Context, 128); +  case MVT::f16:     return Type::getHalfTy(Context); +  case MVT::f32:     return Type::getFloatTy(Context); +  case MVT::f64:     return Type::getDoubleTy(Context); +  case MVT::f80:     return Type::getX86_FP80Ty(Context); +  case MVT::f128:    return Type::getFP128Ty(Context); +  case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); +  case MVT::x86mmx:  return Type::getX86_MMXTy(Context); +  case MVT::v1i1:    return VectorType::get(Type::getInt1Ty(Context), 1); +  case MVT::v2i1:    return VectorType::get(Type::getInt1Ty(Context), 2); +  case MVT::v4i1:    return VectorType::get(Type::getInt1Ty(Context), 4); +  case MVT::v8i1:    return VectorType::get(Type::getInt1Ty(Context), 8); +  case MVT::v16i1:   return VectorType::get(Type::getInt1Ty(Context), 16); +  case MVT::v32i1:   return VectorType::get(Type::getInt1Ty(Context), 32); +  case MVT::v64i1:   return VectorType::get(Type::getInt1Ty(Context), 64); +  case MVT::v128i1:  return VectorType::get(Type::getInt1Ty(Context), 128); +  case MVT::v512i1:  return VectorType::get(Type::getInt1Ty(Context), 512); +  case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); +  case MVT::v1i8:    return VectorType::get(Type::getInt8Ty(Context), 1); +  case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2); +  case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4); +  case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8); +  case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16); +  case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32); +  case MVT::v64i8:   return VectorType::get(Type::getInt8Ty(Context), 64); +  case MVT::v128i8:  return VectorType::get(Type::getInt8Ty(Context), 128); +  case MVT::v256i8:  return VectorType::get(Type::getInt8Ty(Context), 256); +  case MVT::v1i16:   return VectorType::get(Type::getInt16Ty(Context), 1); +  case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2); +  case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4); +  case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8); +  case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16); +  case MVT::v32i16:  return VectorType::get(Type::getInt16Ty(Context), 32); +  case MVT::v64i16:  return VectorType::get(Type::getInt16Ty(Context), 64); +  case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128); +  case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1); +  case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2); +  case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4); +  case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8); +  case MVT::v16i32:  return VectorType::get(Type::getInt32Ty(Context), 16); +  case MVT::v32i32:  return VectorType::get(Type::getInt32Ty(Context), 32); +  case MVT::v64i32:  return VectorType::get(Type::getInt32Ty(Context), 64); +  case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1); +  case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2); +  case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4); +  case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8); +  case MVT::v16i64:  return VectorType::get(Type::getInt64Ty(Context), 16); +  case MVT::v32i64:  return VectorType::get(Type::getInt64Ty(Context), 32); +  case MVT::v1i128:  return VectorType::get(Type::getInt128Ty(Context), 1); +  case MVT::v2f16:   return VectorType::get(Type::getHalfTy(Context), 2); +  case MVT::v4f16:   return VectorType::get(Type::getHalfTy(Context), 4); +  case MVT::v8f16:   return VectorType::get(Type::getHalfTy(Context), 8); +  case MVT::v1f32:   return VectorType::get(Type::getFloatTy(Context), 1); +  case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2); +  case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4); +  case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8); +  case MVT::v16f32:   return VectorType::get(Type::getFloatTy(Context), 16); +  case MVT::v1f64:   return VectorType::get(Type::getDoubleTy(Context), 1); +  case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2); +  case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4); +  case MVT::v8f64:   return VectorType::get(Type::getDoubleTy(Context), 8); +  case MVT::Metadata: return Type::getMetadataTy(Context); + } +} + +/// Return the value type corresponding to the specified type.  This returns all +/// pointers as MVT::iPTR.  If HandleUnknown is true, unknown types are returned +/// as Other, otherwise they are invalid. +MVT MVT::getVT(Type *Ty, bool HandleUnknown){ +  switch (Ty->getTypeID()) { +  default: +    if (HandleUnknown) return MVT(MVT::Other); +    llvm_unreachable("Unknown type!"); +  case Type::VoidTyID: +    return MVT::isVoid; +  case Type::IntegerTyID: +    return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth()); +  case Type::HalfTyID:      return MVT(MVT::f16); +  case Type::FloatTyID:     return MVT(MVT::f32); +  case Type::DoubleTyID:    return MVT(MVT::f64); +  case Type::X86_FP80TyID:  return MVT(MVT::f80); +  case Type::X86_MMXTyID:   return MVT(MVT::x86mmx); +  case Type::FP128TyID:     return MVT(MVT::f128); +  case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); +  case Type::PointerTyID:   return MVT(MVT::iPTR); +  case Type::VectorTyID: { +    VectorType *VTy = cast<VectorType>(Ty); +    return getVectorVT( +      getVT(VTy->getElementType(), false), VTy->getNumElements()); +  } +  } +} + +/// getEVT - Return the value type corresponding to the specified type.  This +/// returns all pointers as MVT::iPTR.  If HandleUnknown is true, unknown types +/// are returned as Other, otherwise they are invalid. +EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ +  switch (Ty->getTypeID()) { +  default: +    return MVT::getVT(Ty, HandleUnknown); +  case Type::IntegerTyID: +    return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth()); +  case Type::VectorTyID: { +    VectorType *VTy = cast<VectorType>(Ty); +    return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false), +                       VTy->getNumElements()); +  } +  } +} diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index 13f7e83f3dd0..0ead2b8340ab 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -35,6 +35,7 @@  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/Pass.h"  #include "llvm/Support/Compiler.h" @@ -241,10 +242,9 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {    Indexes = &getAnalysis<SlotIndexes>();    LIS = &getAnalysis<LiveIntervals>();    VRM = &getAnalysis<VirtRegMap>(); -  DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" -               << "********** Function: " -               << MF->getName() << '\n'); -  DEBUG(VRM->dump()); +  LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" +                    << "********** Function: " << MF->getName() << '\n'); +  LLVM_DEBUG(VRM->dump());    // Add kill flags while we still have virtual registers.    LIS->addKillFlags(VRM); @@ -376,7 +376,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {  void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {    if (!MI.isIdentityCopy())      return; -  DEBUG(dbgs() << "Identity copy: " << MI); +  LLVM_DEBUG(dbgs() << "Identity copy: " << MI);    ++NumIdCopies;    // Copies like: @@ -387,14 +387,14 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {    // instruction to maintain this information.    if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {      MI.setDesc(TII->get(TargetOpcode::KILL)); -    DEBUG(dbgs() << "  replace by: " << MI); +    LLVM_DEBUG(dbgs() << "  replace by: " << MI);      return;    }    if (Indexes)      Indexes->removeSingleMachineInstrFromMaps(MI);    MI.eraseFromBundle(); -  DEBUG(dbgs() << "  deleted.\n"); +  LLVM_DEBUG(dbgs() << "  deleted.\n");  }  /// The liverange splitting logic sometimes produces bundles of copies when @@ -406,6 +406,8 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {      return;    if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { +    SmallVector<MachineInstr *, 2> MIs({&MI}); +      // Only do this when the complete bundle is made out of COPYs.      MachineBasicBlock &MBB = *MI.getParent();      for (MachineBasicBlock::reverse_instr_iterator I = @@ -413,16 +415,53 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {           I != E && I->isBundledWithSucc(); ++I) {        if (!I->isCopy())          return; +      MIs.push_back(&*I); +    } +    MachineInstr *FirstMI = MIs.back(); + +    auto anyRegsAlias = [](const MachineInstr *Dst, +                           ArrayRef<MachineInstr *> Srcs, +                           const TargetRegisterInfo *TRI) { +      for (const MachineInstr *Src : Srcs) +        if (Src != Dst) +          if (TRI->regsOverlap(Dst->getOperand(0).getReg(), +                               Src->getOperand(1).getReg())) +            return true; +      return false; +    }; + +    // If any of the destination registers in the bundle of copies alias any of +    // the source registers, try to schedule the instructions to avoid any +    // clobbering. +    for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) { +      for (int I = E; I--; ) +        if (!anyRegsAlias(MIs[I], makeArrayRef(MIs).take_front(E), TRI)) { +          if (I + 1 != E) +            std::swap(MIs[I], MIs[E - 1]); +          --E; +        } +      if (PrevE == E) { +        MF->getFunction().getContext().emitError( +            "register rewriting failed: cycle in copy bundle"); +        break; +      }      } -    for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator(); -         I->isBundledWithPred(); ) { -      MachineInstr &MI = *I; -      ++I; +    MachineInstr *BundleStart = FirstMI; +    for (MachineInstr *BundledMI : llvm::reverse(MIs)) { +      // If instruction is in the middle of the bundle, move it before the +      // bundle starts, otherwise, just unbundle it. When we get to the last +      // instruction, the bundle will have been completely undone. +      if (BundledMI != BundleStart) { +        BundledMI->removeFromBundle(); +        MBB.insert(FirstMI, BundledMI); +      } else if (BundledMI->isBundledWithSucc()) { +        BundledMI->unbundleFromSucc(); +        BundleStart = &*std::next(BundledMI->getIterator()); +      } -      MI.unbundleFromPred(); -      if (Indexes) -        Indexes->insertMachineInstrInMaps(MI); +      if (Indexes && BundledMI != FirstMI) +        Indexes->insertMachineInstrInMaps(*BundledMI);      }    }  } @@ -461,7 +500,7 @@ void VirtRegRewriter::rewrite() {    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();         MBBI != MBBE; ++MBBI) { -    DEBUG(MBBI->print(dbgs(), Indexes)); +    LLVM_DEBUG(MBBI->print(dbgs(), Indexes));      for (MachineBasicBlock::instr_iterator             MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {        MachineInstr *MI = &*MII; @@ -530,7 +569,7 @@ void VirtRegRewriter::rewrite() {          // Rewrite. Note we could have used MachineOperand::substPhysReg(), but          // we need the inlining here.          MO.setReg(PhysReg); -        MO.setIsRenamableIfNoExtraRegAllocReq(); +        MO.setIsRenamable(true);        }        // Add any missing super-register kills after rewriting the whole @@ -544,7 +583,7 @@ void VirtRegRewriter::rewrite() {        while (!SuperDefs.empty())          MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); -      DEBUG(dbgs() << "> " << *MI); +      LLVM_DEBUG(dbgs() << "> " << *MI);        expandCopyBundle(*MI); diff --git a/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp new file mode 100644 index 000000000000..83d04da5dd0c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -0,0 +1,374 @@ +//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use +// WebAssembly exception handling scheme. +// +// WebAssembly exception handling uses Windows exception IR for the middle level +// representation. This pass does the following transformation for every +// catchpad block: +// (In C-style pseudocode) +// +// - Before: +//   catchpad ... +//   exn = wasm.get.exception(); +//   selector = wasm.get.selector(); +//   ... +// +// - After: +//   catchpad ... +//   exn = wasm.catch(0); // 0 is a tag for C++ +//   wasm.landingpad.index(index); +//   // Only add below in case it's not a single catch (...) +//   __wasm_lpad_context.lpad_index = index; +//   __wasm_lpad_context.lsda = wasm.lsda(); +//   _Unwind_CallPersonality(exn); +//   int selector = __wasm.landingpad_context.selector; +//   ... +// +// Also, does the following for a cleanuppad block with a call to +// __clang_call_terminate(): +// - Before: +//   cleanuppad ... +//   exn = wasm.get.exception(); +//   __clang_call_terminate(exn); +// +// - After: +//   cleanuppad ... +//   exn = wasm.catch(0); // 0 is a tag for C++ +//   __clang_call_terminate(exn); +// +// +// * Background: WebAssembly EH instructions +// WebAssembly's try and catch instructions are structured as follows: +// try +//   instruction* +// catch (C++ tag) +//   instruction* +// ... +// catch_all +//   instruction* +// try_end +// +// A catch instruction in WebAssembly does not correspond to a C++ catch clause. +// In WebAssembly, there is a single catch instruction for all C++ exceptions. +// There can be more catch instructions for exceptions in other languages, but +// they are not generated for now. catch_all catches all exceptions including +// foreign exceptions (e.g. JavaScript). We turn catchpads into catch (C++ tag) +// and cleanuppads into catch_all, with one exception: cleanuppad with a call to +// __clang_call_terminate should be both in catch (C++ tag) and catch_all. +// +// +// * Background: Direct personality function call +// In WebAssembly EH, the VM is responsible for unwinding the stack once an +// exception is thrown. After the stack is unwound, the control flow is +// transfered to WebAssembly 'catch' instruction, which returns a caught +// exception object. +// +// Unwinding the stack is not done by libunwind but the VM, so the personality +// function in libcxxabi cannot be called from libunwind during the unwinding +// process. So after a catch instruction, we insert a call to a wrapper function +// in libunwind that in turn calls the real personality function. +// +// In Itanium EH, if the personality function decides there is no matching catch +// clause in a call frame and no cleanup action to perform, the unwinder doesn't +// stop there and continues unwinding. But in Wasm EH, the unwinder stops at +// every call frame with a catch intruction, after which the personality +// function is called from the compiler-generated user code here. +// +// In libunwind, we have this struct that serves as a communincation channel +// between the compiler-generated user code and the personality function in +// libcxxabi. +// +// struct _Unwind_LandingPadContext { +//   uintptr_t lpad_index; +//   uintptr_t lsda; +//   uintptr_t selector; +// }; +// struct _Unwind_LandingPadContext __wasm_lpad_context = ...; +// +// And this wrapper in libunwind calls the personality function. +// +// _Unwind_Reason_Code _Unwind_CallPersonality(void *exception_ptr) { +//   struct _Unwind_Exception *exception_obj = +//       (struct _Unwind_Exception *)exception_ptr; +//   _Unwind_Reason_Code ret = __gxx_personality_v0( +//       1, _UA_CLEANUP_PHASE, exception_obj->exception_class, exception_obj, +//       (struct _Unwind_Context *)__wasm_lpad_context); +//   return ret; +// } +// +// We pass a landing pad index, and the address of LSDA for the current function +// to the wrapper function _Unwind_CallPersonality in libunwind, and we retrieve +// the selector after it returns. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "wasmehprepare" + +namespace { +class WasmEHPrepare : public FunctionPass { +  Type *LPadContextTy = nullptr; // type of 'struct _Unwind_LandingPadContext' +  GlobalVariable *LPadContextGV = nullptr; // __wasm_lpad_context + +  // Field addresses of struct _Unwind_LandingPadContext +  Value *LPadIndexField = nullptr; // lpad_index field +  Value *LSDAField = nullptr;      // lsda field +  Value *SelectorField = nullptr;  // selector + +  Function *CatchF = nullptr;           // wasm.catch.extract() intrinsic +  Function *LPadIndexF = nullptr;       // wasm.landingpad.index() intrinsic +  Function *LSDAF = nullptr;            // wasm.lsda() intrinsic +  Function *GetExnF = nullptr;          // wasm.get.exception() intrinsic +  Function *GetSelectorF = nullptr;     // wasm.get.ehselector() intrinsic +  Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper +  Function *ClangCallTermF = nullptr;   // __clang_call_terminate() function + +  void prepareEHPad(BasicBlock *BB, unsigned Index); +  void prepareTerminateCleanupPad(BasicBlock *BB); + +public: +  static char ID; // Pass identification, replacement for typeid + +  WasmEHPrepare() : FunctionPass(ID) {} + +  bool doInitialization(Module &M) override; +  bool runOnFunction(Function &F) override; + +  StringRef getPassName() const override { +    return "WebAssembly Exception handling preparation"; +  } +}; +} // end anonymous namespace + +char WasmEHPrepare::ID = 0; +INITIALIZE_PASS(WasmEHPrepare, DEBUG_TYPE, "Prepare WebAssembly exceptions", +                false, false) + +FunctionPass *llvm::createWasmEHPass() { return new WasmEHPrepare(); } + +bool WasmEHPrepare::doInitialization(Module &M) { +  IRBuilder<> IRB(M.getContext()); +  LPadContextTy = StructType::get(IRB.getInt32Ty(),   // lpad_index +                                  IRB.getInt8PtrTy(), // lsda +                                  IRB.getInt32Ty()    // selector +  ); +  return false; +} + +bool WasmEHPrepare::runOnFunction(Function &F) { +  SmallVector<BasicBlock *, 16> CatchPads; +  SmallVector<BasicBlock *, 16> CleanupPads; +  for (BasicBlock &BB : F) { +    if (!BB.isEHPad()) +      continue; +    auto *Pad = BB.getFirstNonPHI(); +    if (isa<CatchPadInst>(Pad)) +      CatchPads.push_back(&BB); +    else if (isa<CleanupPadInst>(Pad)) +      CleanupPads.push_back(&BB); +  } + +  if (CatchPads.empty() && CleanupPads.empty()) +    return false; +  assert(F.hasPersonalityFn() && "Personality function not found"); + +  Module &M = *F.getParent(); +  IRBuilder<> IRB(F.getContext()); + +  // __wasm_lpad_context global variable +  LPadContextGV = cast<GlobalVariable>( +      M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); +  LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, +                                          "lpad_index_gep"); +  LSDAField = +      IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 1, "lsda_gep"); +  SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2, +                                         "selector_gep"); + +  // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction. +  CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch); +  // wasm.landingpad.index() intrinsic, which is to specify landingpad index +  LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index); +  // wasm.lsda() intrinsic. Returns the address of LSDA table for the current +  // function. +  LSDAF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_lsda); +  // wasm.get.exception() and wasm.get.ehselector() intrinsics. Calls to these +  // are generated in clang. +  GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception); +  GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector); + +  // _Unwind_CallPersonality() wrapper function, which calls the personality +  CallPersonalityF = cast<Function>(M.getOrInsertFunction( +      "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy())); +  CallPersonalityF->setDoesNotThrow(); + +  // __clang_call_terminate() function, which is inserted by clang in case a +  // cleanup throws +  ClangCallTermF = M.getFunction("__clang_call_terminate"); + +  unsigned Index = 0; +  for (auto *BB : CatchPads) { +    auto *CPI = cast<CatchPadInst>(BB->getFirstNonPHI()); +    // In case of a single catch (...), we don't need to emit LSDA +    if (CPI->getNumArgOperands() == 1 && +        cast<Constant>(CPI->getArgOperand(0))->isNullValue()) +      prepareEHPad(BB, -1); +    else +      prepareEHPad(BB, Index++); +  } + +  if (!ClangCallTermF) +    return !CatchPads.empty(); + +  // Cleanuppads will turn into catch_all later, but cleanuppads with a call to +  // __clang_call_terminate() is a special case. __clang_call_terminate() takes +  // an exception object, so we have to duplicate call in both 'catch <C++ tag>' +  // and 'catch_all' clauses. Here we only insert a call to catch; the +  // duplication will be done later. In catch_all, the exception object will be +  // set to null. +  for (auto *BB : CleanupPads) +    for (auto &I : *BB) +      if (auto *CI = dyn_cast<CallInst>(&I)) +        if (CI->getCalledValue() == ClangCallTermF) +          prepareEHPad(BB, -1); + +  return true; +} + +void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) { +  assert(BB->isEHPad() && "BB is not an EHPad!"); +  IRBuilder<> IRB(BB->getContext()); + +  IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); +  // The argument to wasm.catch() is the tag for C++ exceptions, which we set to +  // 0 for this module. +  // Pseudocode: void *exn = wasm.catch(0); +  Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn"); +  // Replace the return value of wasm.get.exception() with the return value from +  // wasm.catch(). +  auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI()); +  Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr; +  for (auto &U : FPI->uses()) { +    if (auto *CI = dyn_cast<CallInst>(U.getUser())) { +      if (CI->getCalledValue() == GetExnF) +        GetExnCI = CI; +      else if (CI->getCalledValue() == GetSelectorF) +        GetSelectorCI = CI; +    } +  } + +  assert(GetExnCI && "wasm.get.exception() call does not exist"); +  GetExnCI->replaceAllUsesWith(Exn); +  GetExnCI->eraseFromParent(); + +  // In case it is a catchpad with single catch (...) or a cleanuppad, we don't +  // need to call personality function because we don't need a selector. +  if (FPI->getNumArgOperands() == 0 || +      (FPI->getNumArgOperands() == 1 && +       cast<Constant>(FPI->getArgOperand(0))->isNullValue())) { +    if (GetSelectorCI) { +      assert(GetSelectorCI->use_empty() && +             "wasm.get.ehselector() still has uses!"); +      GetSelectorCI->eraseFromParent(); +    } +    return; +  } +  IRB.SetInsertPoint(Exn->getNextNode()); + +  // This is to create a map of <landingpad EH label, landingpad index> in +  // SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables. +  // Pseudocode: wasm.landingpad.index(Index); +  IRB.CreateCall(LPadIndexF, IRB.getInt32(Index)); + +  // Pseudocode: __wasm_lpad_context.lpad_index = index; +  IRB.CreateStore(IRB.getInt32(Index), LPadIndexField); + +  // Store LSDA address only if this catchpad belongs to a top-level +  // catchswitch. If there is another catchpad that dominates this pad, we don't +  // need to store LSDA address again, because they are the same throughout the +  // function and have been already stored before. +  // TODO Can we not store LSDA address in user function but make libcxxabi +  // compute it? +  auto *CPI = cast<CatchPadInst>(FPI); +  if (isa<ConstantTokenNone>(CPI->getCatchSwitch()->getParentPad())) +    // Pseudocode: __wasm_lpad_context.lsda = wasm.lsda(); +    IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField); + +  // Pseudocode: _Unwind_CallPersonality(exn); +  CallInst *PersCI = +      IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI)); +  PersCI->setDoesNotThrow(); + +  // Pseudocode: int selector = __wasm.landingpad_context.selector; +  Instruction *Selector = IRB.CreateLoad(SelectorField, "selector"); + +  // Replace the return value from wasm.get.ehselector() with the selector value +  // loaded from __wasm_lpad_context.selector. +  assert(GetSelectorCI && "wasm.get.ehselector() call does not exist"); +  GetSelectorCI->replaceAllUsesWith(Selector); +  GetSelectorCI->eraseFromParent(); +} + +void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) { +  for (const auto &BB : *F) { +    if (!BB.isEHPad()) +      continue; +    const Instruction *Pad = BB.getFirstNonPHI(); + +    // If an exception is not caught by a catchpad (i.e., it is a foreign +    // exception), it will unwind to its parent catchswitch's unwind +    // destination. We don't record an unwind destination for cleanuppads +    // because every exception should be caught by it. +    if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) { +      const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest(); +      if (!UnwindBB) +        continue; +      const Instruction *UnwindPad = UnwindBB->getFirstNonPHI(); +      if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad)) +        // Currently there should be only one handler per a catchswitch. +        EHInfo.setEHPadUnwindDest(&BB, *CatchSwitch->handlers().begin()); +      else // cleanuppad +        EHInfo.setEHPadUnwindDest(&BB, UnwindBB); +    } +  } + +  // Record the unwind destination for invoke and cleanupret instructions. +  for (const auto &BB : *F) { +    const Instruction *TI = BB.getTerminator(); +    BasicBlock *UnwindBB = nullptr; +    if (const auto *Invoke = dyn_cast<InvokeInst>(TI)) +      UnwindBB = Invoke->getUnwindDest(); +    else if (const auto *CleanupRet = dyn_cast<CleanupReturnInst>(TI)) +      UnwindBB = CleanupRet->getUnwindDest(); +    if (!UnwindBB) +      continue; +    const Instruction *UnwindPad = UnwindBB->getFirstNonPHI(); +    if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad)) +      // Currently there should be only one handler per a catchswitch. +      EHInfo.setThrowUnwindDest(&BB, *CatchSwitch->handlers().begin()); +    else // cleanuppad +      EHInfo.setThrowUnwindDest(&BB, UnwindBB); +  } +} diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index 0b16a113640d..e629c13f133f 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -21,6 +21,7 @@  #include "llvm/ADT/STLExtras.h"  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/CodeGen/WinEHFuncInfo.h" @@ -31,7 +32,6 @@  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/SSAUpdater.h"  using namespace llvm; @@ -41,7 +41,7 @@ using namespace llvm;  static cl::opt<bool> DisableDemotion(      "disable-demotion", cl::Hidden,      cl::desc( -        "Clone multicolor basic blocks but do not demote cross funclet values"), +        "Clone multicolor basic blocks but do not demote cross scopes"),      cl::init(false));  static cl::opt<bool> DisableCleanups( @@ -49,12 +49,17 @@ static cl::opt<bool> DisableCleanups(      cl::desc("Do not remove implausible terminators or other similar cleanups"),      cl::init(false)); +static cl::opt<bool> DemoteCatchSwitchPHIOnlyOpt( +    "demote-catchswitch-only", cl::Hidden, +    cl::desc("Demote catchswitch BBs only (for wasm EH)"), cl::init(false)); +  namespace {  class WinEHPrepare : public FunctionPass {  public:    static char ID; // Pass identification, replacement for typeid. -  WinEHPrepare() : FunctionPass(ID) {} +  WinEHPrepare(bool DemoteCatchSwitchPHIOnly = false) +      : FunctionPass(ID), DemoteCatchSwitchPHIOnly(DemoteCatchSwitchPHIOnly) {}    bool runOnFunction(Function &Fn) override; @@ -77,12 +82,14 @@ private:    bool prepareExplicitEH(Function &F);    void colorFunclets(Function &F); -  void demotePHIsOnFunclets(Function &F); +  void demotePHIsOnFunclets(Function &F, bool DemoteCatchSwitchPHIOnly);    void cloneCommonBlocks(Function &F);    void removeImplausibleInstructions(Function &F);    void cleanupPreparedFunclets(Function &F);    void verifyPreparedFunclets(Function &F); +  bool DemoteCatchSwitchPHIOnly; +    // All fields are reset by runOnFunction.    EHPersonality Personality = EHPersonality::Unknown; @@ -97,7 +104,9 @@ char WinEHPrepare::ID = 0;  INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions",                  false, false) -FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); } +FunctionPass *llvm::createWinEHPass(bool DemoteCatchSwitchPHIOnly) { +  return new WinEHPrepare(DemoteCatchSwitchPHIOnly); +}  bool WinEHPrepare::runOnFunction(Function &Fn) {    if (!Fn.hasPersonalityFn()) @@ -106,8 +115,8 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {    // Classify the personality to see what kind of preparation we need.    Personality = classifyEHPersonality(Fn.getPersonalityFn()); -  // Do nothing if this is not a funclet-based personality. -  if (!isFuncletEHPersonality(Personality)) +  // Do nothing if this is not a scope-based personality. +  if (!isScopedEHPersonality(Personality))      return false;    DL = &Fn.getParent()->getDataLayout(); @@ -271,10 +280,11 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,      }      int CatchHigh = FuncInfo.getLastStateNumber();      addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers); -    DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); -    DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n'); -    DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh -                 << '\n'); +    LLVM_DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); +    LLVM_DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh +                      << '\n'); +    LLVM_DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh +                      << '\n');    } else {      auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI); @@ -285,8 +295,8 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,      int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB);      FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; -    DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " -                 << BB->getName() << '\n'); +    LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " +                      << BB->getName() << '\n');      for (const BasicBlock *PredBlock : predecessors(BB)) {        if ((PredBlock = getEHPadFromPredecessor(PredBlock,                                                 CleanupPad->getParentPad()))) { @@ -351,8 +361,8 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,      // Everything in the __try block uses TryState as its parent state.      FuncInfo.EHPadStateMap[CatchSwitch] = TryState; -    DEBUG(dbgs() << "Assigning state #" << TryState << " to BB " -                 << CatchPadBB->getName() << '\n'); +    LLVM_DEBUG(dbgs() << "Assigning state #" << TryState << " to BB " +                      << CatchPadBB->getName() << '\n');      for (const BasicBlock *PredBlock : predecessors(BB))        if ((PredBlock = getEHPadFromPredecessor(PredBlock,                                                 CatchSwitch->getParentPad()))) @@ -387,8 +397,8 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,      int CleanupState = addSEHFinally(FuncInfo, ParentState, BB);      FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; -    DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " -                 << BB->getName() << '\n'); +    LLVM_DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " +                      << BB->getName() << '\n');      for (const BasicBlock *PredBlock : predecessors(BB))        if ((PredBlock =                 getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad()))) @@ -677,13 +687,17 @@ void WinEHPrepare::colorFunclets(Function &F) {    }  } -void WinEHPrepare::demotePHIsOnFunclets(Function &F) { +void WinEHPrepare::demotePHIsOnFunclets(Function &F, +                                        bool DemoteCatchSwitchPHIOnly) {    // Strip PHI nodes off of EH pads.    SmallVector<PHINode *, 16> PHINodes;    for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {      BasicBlock *BB = &*FI++;      if (!BB->isEHPad())        continue; +    if (DemoteCatchSwitchPHIOnly && !isa<CatchSwitchInst>(BB->getFirstNonPHI())) +      continue; +      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {        Instruction *I = &*BI++;        auto *PN = dyn_cast<PHINode>(I); @@ -1031,20 +1045,21 @@ bool WinEHPrepare::prepareExplicitEH(Function &F) {    cloneCommonBlocks(F);    if (!DisableDemotion) -    demotePHIsOnFunclets(F); +    demotePHIsOnFunclets(F, DemoteCatchSwitchPHIOnly || +                                DemoteCatchSwitchPHIOnlyOpt);    if (!DisableCleanups) { -    DEBUG(verifyFunction(F)); +    LLVM_DEBUG(verifyFunction(F));      removeImplausibleInstructions(F); -    DEBUG(verifyFunction(F)); +    LLVM_DEBUG(verifyFunction(F));      cleanupPreparedFunclets(F);    } -  DEBUG(verifyPreparedFunclets(F)); +  LLVM_DEBUG(verifyPreparedFunclets(F));    // Recolor the CFG to verify that all is well. -  DEBUG(colorFunclets(F)); -  DEBUG(verifyPreparedFunclets(F)); +  LLVM_DEBUG(colorFunclets(F)); +  LLVM_DEBUG(verifyPreparedFunclets(F));    BlockColors.clear();    FuncletBlocks.clear(); diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp index 3d83afcf1fc5..32a7457c2060 100644 --- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -52,7 +52,6 @@ struct XRayInstrumentation : public MachineFunctionPass {    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.setPreservesCFG(); -    AU.addRequired<MachineLoopInfo>();      AU.addPreserved<MachineLoopInfo>();      AU.addPreserved<MachineDominatorTree>();      MachineFunctionPass::getAnalysisUsage(AU); @@ -160,11 +159,26 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {      for (const auto &MBB : MF)        MICount += MBB.size(); +    // Get MachineDominatorTree or compute it on the fly if it's unavailable +    auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); +    MachineDominatorTree ComputedMDT; +    if (!MDT) { +      ComputedMDT.getBase().recalculate(MF); +      MDT = &ComputedMDT; +    } + +    // Get MachineLoopInfo or compute it on the fly if it's unavailable +    auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); +    MachineLoopInfo ComputedMLI; +    if (!MLI) { +      ComputedMLI.getBase().analyze(MDT->getBase()); +      MLI = &ComputedMLI; +    } +      // Check if we have a loop.      // FIXME: Maybe make this smarter, and see whether the loops are dependent      // on inputs or side-effects? -    MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); -    if (MLI.empty() && MICount < XRayThreshold) +    if (MLI->empty() && MICount < XRayThreshold)        return false; // Function is too small and has no loops.    }  | 
