diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-01-19 10:01:25 +0000 | 
| commit | d8e91e46262bc44006913e6796843909f1ac7bcd (patch) | |
| tree | 7d0c143d9b38190e0fa0180805389da22cd834c5 /lib/MCA/InstrBuilder.cpp | |
| parent | b7eb8e35e481a74962664b63dfb09483b200209a (diff) | |
Notes
Diffstat (limited to 'lib/MCA/InstrBuilder.cpp')
| -rw-r--r-- | lib/MCA/InstrBuilder.cpp | 698 | 
1 files changed, 698 insertions, 0 deletions
diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp new file mode 100644 index 000000000000..d2d65e55537c --- /dev/null +++ b/lib/MCA/InstrBuilder.cpp @@ -0,0 +1,698 @@ +//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file implements the InstrBuilder interface. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/InstrBuilder.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "llvm-mca" + +namespace llvm { +namespace mca { + +InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, +                           const llvm::MCInstrInfo &mcii, +                           const llvm::MCRegisterInfo &mri, +                           const llvm::MCInstrAnalysis *mcia) +    : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true), +      FirstReturnInst(true) { +  const MCSchedModel &SM = STI.getSchedModel(); +  ProcResourceMasks.resize(SM.getNumProcResourceKinds()); +  computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); +} + +static void initializeUsedResources(InstrDesc &ID, +                                    const MCSchedClassDesc &SCDesc, +                                    const MCSubtargetInfo &STI, +                                    ArrayRef<uint64_t> ProcResourceMasks) { +  const MCSchedModel &SM = STI.getSchedModel(); + +  // Populate resources consumed. +  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>; +  std::vector<ResourcePlusCycles> Worklist; + +  // Track cycles contributed by resources that are in a "Super" relationship. +  // This is required if we want to correctly match the behavior of method +  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set +  // of "consumed" processor resources and resource cycles, the logic in +  // ExpandProcResource() doesn't update the number of resource cycles +  // contributed by a "Super" resource to a group. +  // We need to take this into account when we find that a processor resource is +  // part of a group, and it is also used as the "Super" of other resources. +  // This map stores the number of cycles contributed by sub-resources that are +  // part of a "Super" resource. The key value is the "Super" resource mask ID. +  DenseMap<uint64_t, unsigned> SuperResources; + +  unsigned NumProcResources = SM.getNumProcResourceKinds(); +  APInt Buffers(NumProcResources, 0); + +  bool AllInOrderResources = true; +  bool AnyDispatchHazards = false; +  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) { +    const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I; +    const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx); +    uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx]; +    if (PR.BufferSize < 0) { +      AllInOrderResources = false; +    } else { +      Buffers.setBit(PRE->ProcResourceIdx); +      AnyDispatchHazards |= (PR.BufferSize == 0); +      AllInOrderResources &= (PR.BufferSize <= 1); +    } + +    CycleSegment RCy(0, PRE->Cycles, false); +    Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy))); +    if (PR.SuperIdx) { +      uint64_t Super = ProcResourceMasks[PR.SuperIdx]; +      SuperResources[Super] += PRE->Cycles; +    } +  } + +  ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards; + +  // Sort elements by mask popcount, so that we prioritize resource units over +  // resource groups, and smaller groups over larger groups. +  sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) { +    unsigned popcntA = countPopulation(A.first); +    unsigned popcntB = countPopulation(B.first); +    if (popcntA < popcntB) +      return true; +    if (popcntA > popcntB) +      return false; +    return A.first < B.first; +  }); + +  uint64_t UsedResourceUnits = 0; + +  // Remove cycles contributed by smaller resources. +  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) { +    ResourcePlusCycles &A = Worklist[I]; +    if (!A.second.size()) { +      A.second.NumUnits = 0; +      A.second.setReserved(); +      ID.Resources.emplace_back(A); +      continue; +    } + +    ID.Resources.emplace_back(A); +    uint64_t NormalizedMask = A.first; +    if (countPopulation(A.first) == 1) { +      UsedResourceUnits |= A.first; +    } else { +      // Remove the leading 1 from the resource group mask. +      NormalizedMask ^= PowerOf2Floor(NormalizedMask); +    } + +    for (unsigned J = I + 1; J < E; ++J) { +      ResourcePlusCycles &B = Worklist[J]; +      if ((NormalizedMask & B.first) == NormalizedMask) { +        B.second.CS.subtract(A.second.size() - SuperResources[A.first]); +        if (countPopulation(B.first) > 1) +          B.second.NumUnits++; +      } +    } +  } + +  // A SchedWrite may specify a number of cycles in which a resource group +  // is reserved. For example (on target x86; cpu Haswell): +  // +  //  SchedWriteRes<[HWPort0, HWPort1, HWPort01]> { +  //    let ResourceCycles = [2, 2, 3]; +  //  } +  // +  // This means: +  // Resource units HWPort0 and HWPort1 are both used for 2cy. +  // Resource group HWPort01 is the union of HWPort0 and HWPort1. +  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01 +  // will not be usable for 2 entire cycles from instruction issue. +  // +  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency +  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an +  // extra delay on top of the 2 cycles latency. +  // During those extra cycles, HWPort01 is not usable by other instructions. +  for (ResourcePlusCycles &RPC : ID.Resources) { +    if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) { +      // Remove the leading 1 from the resource group mask. +      uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first); +      if ((Mask & UsedResourceUnits) == Mask) +        RPC.second.setReserved(); +    } +  } + +  // Identify extra buffers that are consumed through super resources. +  for (const std::pair<uint64_t, unsigned> &SR : SuperResources) { +    for (unsigned I = 1, E = NumProcResources; I < E; ++I) { +      const MCProcResourceDesc &PR = *SM.getProcResource(I); +      if (PR.BufferSize == -1) +        continue; + +      uint64_t Mask = ProcResourceMasks[I]; +      if (Mask != SR.first && ((Mask & SR.first) == SR.first)) +        Buffers.setBit(I); +    } +  } + +  // Now set the buffers. +  if (unsigned NumBuffers = Buffers.countPopulation()) { +    ID.Buffers.resize(NumBuffers); +    for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { +      if (Buffers[I]) { +        --NumBuffers; +        ID.Buffers[NumBuffers] = ProcResourceMasks[I]; +      } +    } +  } + +  LLVM_DEBUG({ +    for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) +      dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", " +             << "cy=" << R.second.size() << '\n'; +    for (const uint64_t R : ID.Buffers) +      dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; +  }); +} + +static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, +                              const MCSchedClassDesc &SCDesc, +                              const MCSubtargetInfo &STI) { +  if (MCDesc.isCall()) { +    // We cannot estimate how long this call will take. +    // Artificially set an arbitrarily high latency (100cy). +    ID.MaxLatency = 100U; +    return; +  } + +  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); +  // If latency is unknown, then conservatively assume a MaxLatency of 100cy. +  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); +} + +static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { +  // Count register definitions, and skip non register operands in the process. +  unsigned I, E; +  unsigned NumExplicitDefs = MCDesc.getNumDefs(); +  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) { +    const MCOperand &Op = MCI.getOperand(I); +    if (Op.isReg()) +      --NumExplicitDefs; +  } + +  if (NumExplicitDefs) { +    return make_error<InstructionError<MCInst>>( +        "Expected more register operand definitions.", MCI); +  } + +  if (MCDesc.hasOptionalDef()) { +    // Always assume that the optional definition is the last operand. +    const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1); +    if (I == MCI.getNumOperands() || !Op.isReg()) { +      std::string Message = +          "expected a register operand for an optional definition. Instruction " +          "has not been correctly analyzed."; +      return make_error<InstructionError<MCInst>>(Message, MCI); +    } +  } + +  return ErrorSuccess(); +} + +void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI, +                                  unsigned SchedClassID) { +  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); +  const MCSchedModel &SM = STI.getSchedModel(); +  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); + +  // Assumptions made by this algorithm: +  //  1. The number of explicit and implicit register definitions in a MCInst +  //     matches the number of explicit and implicit definitions according to +  //     the opcode descriptor (MCInstrDesc). +  //  2. Uses start at index #(MCDesc.getNumDefs()). +  //  3. There can only be a single optional register definition, an it is +  //     always the last operand of the sequence (excluding extra operands +  //     contributed by variadic opcodes). +  // +  // These assumptions work quite well for most out-of-order in-tree targets +  // like x86. This is mainly because the vast majority of instructions is +  // expanded to MCInst using a straightforward lowering logic that preserves +  // the ordering of the operands. +  // +  // About assumption 1. +  // The algorithm allows non-register operands between register operand +  // definitions. This helps to handle some special ARM instructions with +  // implicit operand increment (-mtriple=armv7): +  // +  // vld1.32  {d18, d19}, [r1]!  @ <MCInst #1463 VLD1q32wb_fixed +  //                             @  <MCOperand Reg:59> +  //                             @  <MCOperand Imm:0>     (!!) +  //                             @  <MCOperand Reg:67> +  //                             @  <MCOperand Imm:0> +  //                             @  <MCOperand Imm:14> +  //                             @  <MCOperand Reg:0>> +  // +  // MCDesc reports: +  //  6 explicit operands. +  //  1 optional definition +  //  2 explicit definitions (!!) +  // +  // The presence of an 'Imm' operand between the two register definitions +  // breaks the assumption that "register definitions are always at the +  // beginning of the operand sequence". +  // +  // To workaround this issue, this algorithm ignores (i.e. skips) any +  // non-register operands between register definitions.  The optional +  // definition is still at index #(NumOperands-1). +  // +  // According to assumption 2. register reads start at #(NumExplicitDefs-1). +  // That means, register R1 from the example is both read and written. +  unsigned NumExplicitDefs = MCDesc.getNumDefs(); +  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs(); +  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries; +  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs; +  if (MCDesc.hasOptionalDef()) +    TotalDefs++; + +  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); +  ID.Writes.resize(TotalDefs + NumVariadicOps); +  // Iterate over the operands list, and skip non-register operands. +  // The first NumExplictDefs register operands are expected to be register +  // definitions. +  unsigned CurrentDef = 0; +  unsigned i = 0; +  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) { +    const MCOperand &Op = MCI.getOperand(i); +    if (!Op.isReg()) +      continue; + +    WriteDescriptor &Write = ID.Writes[CurrentDef]; +    Write.OpIndex = i; +    if (CurrentDef < NumWriteLatencyEntries) { +      const MCWriteLatencyEntry &WLE = +          *STI.getWriteLatencyEntry(&SCDesc, CurrentDef); +      // Conservatively default to MaxLatency. +      Write.Latency = +          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); +      Write.SClassOrWriteResourceID = WLE.WriteResourceID; +    } else { +      // Assign a default latency for this write. +      Write.Latency = ID.MaxLatency; +      Write.SClassOrWriteResourceID = 0; +    } +    Write.IsOptionalDef = false; +    LLVM_DEBUG({ +      dbgs() << "\t\t[Def]    OpIdx=" << Write.OpIndex +             << ", Latency=" << Write.Latency +             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; +    }); +    CurrentDef++; +  } + +  assert(CurrentDef == NumExplicitDefs && +         "Expected more register operand definitions."); +  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) { +    unsigned Index = NumExplicitDefs + CurrentDef; +    WriteDescriptor &Write = ID.Writes[Index]; +    Write.OpIndex = ~CurrentDef; +    Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef]; +    if (Index < NumWriteLatencyEntries) { +      const MCWriteLatencyEntry &WLE = +          *STI.getWriteLatencyEntry(&SCDesc, Index); +      // Conservatively default to MaxLatency. +      Write.Latency = +          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles); +      Write.SClassOrWriteResourceID = WLE.WriteResourceID; +    } else { +      // Assign a default latency for this write. +      Write.Latency = ID.MaxLatency; +      Write.SClassOrWriteResourceID = 0; +    } + +    Write.IsOptionalDef = false; +    assert(Write.RegisterID != 0 && "Expected a valid phys register!"); +    LLVM_DEBUG({ +      dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex +             << ", PhysReg=" << MRI.getName(Write.RegisterID) +             << ", Latency=" << Write.Latency +             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; +    }); +  } + +  if (MCDesc.hasOptionalDef()) { +    WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs]; +    Write.OpIndex = MCDesc.getNumOperands() - 1; +    // Assign a default latency for this write. +    Write.Latency = ID.MaxLatency; +    Write.SClassOrWriteResourceID = 0; +    Write.IsOptionalDef = true; +    LLVM_DEBUG({ +      dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex +             << ", Latency=" << Write.Latency +             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; +    }); +  } + +  if (!NumVariadicOps) +    return; + +  // FIXME: if an instruction opcode is flagged 'mayStore', and it has no +  // "unmodeledSideEffects', then this logic optimistically assumes that any +  // extra register operands in the variadic sequence is not a register +  // definition. +  // +  // Otherwise, we conservatively assume that any register operand from the +  // variadic sequence is both a register read and a register write. +  bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() && +                        !MCDesc.hasUnmodeledSideEffects(); +  CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef(); +  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); +       I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) { +    const MCOperand &Op = MCI.getOperand(OpIndex); +    if (!Op.isReg()) +      continue; + +    WriteDescriptor &Write = ID.Writes[CurrentDef]; +    Write.OpIndex = OpIndex; +    // Assign a default latency for this write. +    Write.Latency = ID.MaxLatency; +    Write.SClassOrWriteResourceID = 0; +    Write.IsOptionalDef = false; +    ++CurrentDef; +    LLVM_DEBUG({ +      dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex +             << ", Latency=" << Write.Latency +             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n'; +    }); +  } + +  ID.Writes.resize(CurrentDef); +} + +void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, +                                 unsigned SchedClassID) { +  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode()); +  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs(); +  unsigned NumImplicitUses = MCDesc.getNumImplicitUses(); +  // Remove the optional definition. +  if (MCDesc.hasOptionalDef()) +    --NumExplicitUses; +  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands(); +  unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps; +  ID.Reads.resize(TotalUses); +  unsigned CurrentUse = 0; +  for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses; +       ++I, ++OpIndex) { +    const MCOperand &Op = MCI.getOperand(OpIndex); +    if (!Op.isReg()) +      continue; + +    ReadDescriptor &Read = ID.Reads[CurrentUse]; +    Read.OpIndex = OpIndex; +    Read.UseIndex = I; +    Read.SchedClassID = SchedClassID; +    ++CurrentUse; +    LLVM_DEBUG(dbgs() << "\t\t[Use]    OpIdx=" << Read.OpIndex +                      << ", UseIndex=" << Read.UseIndex << '\n'); +  } + +  // For the purpose of ReadAdvance, implicit uses come directly after explicit +  // uses. The "UseIndex" must be updated according to that implicit layout. +  for (unsigned I = 0; I < NumImplicitUses; ++I) { +    ReadDescriptor &Read = ID.Reads[CurrentUse + I]; +    Read.OpIndex = ~I; +    Read.UseIndex = NumExplicitUses + I; +    Read.RegisterID = MCDesc.getImplicitUses()[I]; +    Read.SchedClassID = SchedClassID; +    LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex +                      << ", UseIndex=" << Read.UseIndex << ", RegisterID=" +                      << MRI.getName(Read.RegisterID) << '\n'); +  } + +  CurrentUse += NumImplicitUses; + +  // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no +  // "unmodeledSideEffects", then this logic optimistically assumes that any +  // extra register operands in the variadic sequence are not register +  // definition. + +  bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && +                        !MCDesc.hasUnmodeledSideEffects(); +  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); +       I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) { +    const MCOperand &Op = MCI.getOperand(OpIndex); +    if (!Op.isReg()) +      continue; + +    ReadDescriptor &Read = ID.Reads[CurrentUse]; +    Read.OpIndex = OpIndex; +    Read.UseIndex = NumExplicitUses + NumImplicitUses + I; +    Read.SchedClassID = SchedClassID; +    ++CurrentUse; +    LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex +                      << ", UseIndex=" << Read.UseIndex << '\n'); +  } + +  ID.Reads.resize(CurrentUse); +} + +Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, +                                    const MCInst &MCI) const { +  if (ID.NumMicroOps != 0) +    return ErrorSuccess(); + +  bool UsesMemory = ID.MayLoad || ID.MayStore; +  bool UsesBuffers = !ID.Buffers.empty(); +  bool UsesResources = !ID.Resources.empty(); +  if (!UsesMemory && !UsesBuffers && !UsesResources) +    return ErrorSuccess(); + +  StringRef Message; +  if (UsesMemory) { +    Message = "found an inconsistent instruction that decodes " +              "into zero opcodes and that consumes load/store " +              "unit resources."; +  } else { +    Message = "found an inconsistent instruction that decodes " +              "to zero opcodes and that consumes scheduler " +              "resources."; +  } + +  return make_error<InstructionError<MCInst>>(Message, MCI); +} + +Expected<const InstrDesc &> +InstrBuilder::createInstrDescImpl(const MCInst &MCI) { +  assert(STI.getSchedModel().hasInstrSchedModel() && +         "Itineraries are not yet supported!"); + +  // Obtain the instruction descriptor from the opcode. +  unsigned short Opcode = MCI.getOpcode(); +  const MCInstrDesc &MCDesc = MCII.get(Opcode); +  const MCSchedModel &SM = STI.getSchedModel(); + +  // Then obtain the scheduling class information from the instruction. +  unsigned SchedClassID = MCDesc.getSchedClass(); +  bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant(); + +  // Try to solve variant scheduling classes. +  if (IsVariant) { +    unsigned CPUID = SM.getProcessorID(); +    while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant()) +      SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID); + +    if (!SchedClassID) { +      return make_error<InstructionError<MCInst>>( +          "unable to resolve scheduling class for write variant.", MCI); +    } +  } + +  // Check if this instruction is supported. Otherwise, report an error. +  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID); +  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) { +    return make_error<InstructionError<MCInst>>( +        "found an unsupported instruction in the input assembly sequence.", +        MCI); +  } + +  LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n'); +  LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); + +  // Create a new empty descriptor. +  std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>(); +  ID->NumMicroOps = SCDesc.NumMicroOps; + +  if (MCDesc.isCall() && FirstCallInst) { +    // We don't correctly model calls. +    WithColor::warning() << "found a call in the input assembly sequence.\n"; +    WithColor::note() << "call instructions are not correctly modeled. " +                      << "Assume a latency of 100cy.\n"; +    FirstCallInst = false; +  } + +  if (MCDesc.isReturn() && FirstReturnInst) { +    WithColor::warning() << "found a return instruction in the input" +                         << " assembly sequence.\n"; +    WithColor::note() << "program counter updates are ignored.\n"; +    FirstReturnInst = false; +  } + +  ID->MayLoad = MCDesc.mayLoad(); +  ID->MayStore = MCDesc.mayStore(); +  ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects(); +  ID->BeginGroup = SCDesc.BeginGroup; +  ID->EndGroup = SCDesc.EndGroup; + +  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); +  computeMaxLatency(*ID, MCDesc, SCDesc, STI); + +  if (Error Err = verifyOperands(MCDesc, MCI)) +    return std::move(Err); + +  populateWrites(*ID, MCI, SchedClassID); +  populateReads(*ID, MCI, SchedClassID); + +  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); +  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); + +  // Sanity check on the instruction descriptor. +  if (Error Err = verifyInstrDesc(*ID, MCI)) +    return std::move(Err); + +  // Now add the new descriptor. +  SchedClassID = MCDesc.getSchedClass(); +  bool IsVariadic = MCDesc.isVariadic(); +  if (!IsVariadic && !IsVariant) { +    Descriptors[MCI.getOpcode()] = std::move(ID); +    return *Descriptors[MCI.getOpcode()]; +  } + +  VariantDescriptors[&MCI] = std::move(ID); +  return *VariantDescriptors[&MCI]; +} + +Expected<const InstrDesc &> +InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { +  if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) +    return *Descriptors[MCI.getOpcode()]; + +  if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) +    return *VariantDescriptors[&MCI]; + +  return createInstrDescImpl(MCI); +} + +Expected<std::unique_ptr<Instruction>> +InstrBuilder::createInstruction(const MCInst &MCI) { +  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI); +  if (!DescOrErr) +    return DescOrErr.takeError(); +  const InstrDesc &D = *DescOrErr; +  std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D); + +  // Check if this is a dependency breaking instruction. +  APInt Mask; + +  bool IsZeroIdiom = false; +  bool IsDepBreaking = false; +  if (MCIA) { +    unsigned ProcID = STI.getSchedModel().getProcessorID(); +    IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID); +    IsDepBreaking = +        IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID); +    if (MCIA->isOptimizableRegisterMove(MCI, ProcID)) +      NewIS->setOptimizableMove(); +  } + +  // Initialize Reads first. +  for (const ReadDescriptor &RD : D.Reads) { +    int RegID = -1; +    if (!RD.isImplicitRead()) { +      // explicit read. +      const MCOperand &Op = MCI.getOperand(RD.OpIndex); +      // Skip non-register operands. +      if (!Op.isReg()) +        continue; +      RegID = Op.getReg(); +    } else { +      // Implicit read. +      RegID = RD.RegisterID; +    } + +    // Skip invalid register operands. +    if (!RegID) +      continue; + +    // Okay, this is a register operand. Create a ReadState for it. +    assert(RegID > 0 && "Invalid register ID found!"); +    NewIS->getUses().emplace_back(RD, RegID); +    ReadState &RS = NewIS->getUses().back(); + +    if (IsDepBreaking) { +      // A mask of all zeroes means: explicit input operands are not +      // independent. +      if (Mask.isNullValue()) { +        if (!RD.isImplicitRead()) +          RS.setIndependentFromDef(); +      } else { +        // Check if this register operand is independent according to `Mask`. +        // Note that Mask may not have enough bits to describe all explicit and +        // implicit input operands. If this register operand doesn't have a +        // corresponding bit in Mask, then conservatively assume that it is +        // dependent. +        if (Mask.getBitWidth() > RD.UseIndex) { +          // Okay. This map describe register use `RD.UseIndex`. +          if (Mask[RD.UseIndex]) +            RS.setIndependentFromDef(); +        } +      } +    } +  } + +  // Early exit if there are no writes. +  if (D.Writes.empty()) +    return std::move(NewIS); + +  // Track register writes that implicitly clear the upper portion of the +  // underlying super-registers using an APInt. +  APInt WriteMask(D.Writes.size(), 0); + +  // Now query the MCInstrAnalysis object to obtain information about which +  // register writes implicitly clear the upper portion of a super-register. +  if (MCIA) +    MCIA->clearsSuperRegisters(MRI, MCI, WriteMask); + +  // Initialize writes. +  unsigned WriteIndex = 0; +  for (const WriteDescriptor &WD : D.Writes) { +    unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID +                                          : MCI.getOperand(WD.OpIndex).getReg(); +    // Check if this is a optional definition that references NoReg. +    if (WD.IsOptionalDef && !RegID) { +      ++WriteIndex; +      continue; +    } + +    assert(RegID && "Expected a valid register ID!"); +    NewIS->getDefs().emplace_back(WD, RegID, +                                  /* ClearsSuperRegs */ WriteMask[WriteIndex], +                                  /* WritesZero */ IsZeroIdiom); +    ++WriteIndex; +  } + +  return std::move(NewIS); +} +} // namespace mca +} // namespace llvm  | 
