diff options
Diffstat (limited to 'lib/MCA')
-rw-r--r-- | lib/MCA/CodeEmitter.cpp | 37 | ||||
-rw-r--r-- | lib/MCA/Context.cpp | 23 | ||||
-rw-r--r-- | lib/MCA/HardwareUnits/LSUnit.cpp | 28 | ||||
-rw-r--r-- | lib/MCA/HardwareUnits/RegisterFile.cpp | 16 | ||||
-rw-r--r-- | lib/MCA/HardwareUnits/ResourceManager.cpp | 59 | ||||
-rw-r--r-- | lib/MCA/HardwareUnits/RetireControlUnit.cpp | 65 | ||||
-rw-r--r-- | lib/MCA/HardwareUnits/Scheduler.cpp | 12 | ||||
-rw-r--r-- | lib/MCA/InstrBuilder.cpp | 44 | ||||
-rw-r--r-- | lib/MCA/Instruction.cpp | 4 | ||||
-rw-r--r-- | lib/MCA/Stages/DispatchStage.cpp | 19 | ||||
-rw-r--r-- | lib/MCA/Stages/EntryStage.cpp | 2 | ||||
-rw-r--r-- | lib/MCA/Stages/ExecuteStage.cpp | 22 | ||||
-rw-r--r-- | lib/MCA/Stages/RetireStage.cpp | 8 |
13 files changed, 202 insertions, 137 deletions
diff --git a/lib/MCA/CodeEmitter.cpp b/lib/MCA/CodeEmitter.cpp new file mode 100644 index 000000000000..294107219cb0 --- /dev/null +++ b/lib/MCA/CodeEmitter.cpp @@ -0,0 +1,37 @@ +//===--------------------- CodeEmitter.cpp ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the CodeEmitter API. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/CodeEmitter.h" + +namespace llvm { +namespace mca { + +CodeEmitter::EncodingInfo +CodeEmitter::getOrCreateEncodingInfo(unsigned MCID) { + EncodingInfo &EI = Encodings[MCID]; + if (EI.second) + return EI; + + SmallVector<llvm::MCFixup, 2> Fixups; + const MCInst &Inst = Sequence[MCID]; + MCInst Relaxed(Sequence[MCID]); + if (MAB.mayNeedRelaxation(Inst, STI)) + MAB.relaxInstruction(Inst, STI, Relaxed); + + EI.first = Code.size(); + MCE.encodeInstruction(Relaxed, VecOS, Fixups, STI); + EI.second = Code.size() - EI.first; + return EI; +} + +} // namespace mca +} // namespace llvm diff --git a/lib/MCA/Context.cpp b/lib/MCA/Context.cpp index f0e8dfab8680..0160e1f9f787 100644 --- a/lib/MCA/Context.cpp +++ b/lib/MCA/Context.cpp @@ -28,24 +28,23 @@ namespace llvm { namespace mca { std::unique_ptr<Pipeline> -Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, - SourceMgr &SrcMgr) { +Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) { const MCSchedModel &SM = STI.getSchedModel(); // Create the hardware units defining the backend. - auto RCU = llvm::make_unique<RetireControlUnit>(SM); - auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize); - auto LSU = llvm::make_unique<LSUnit>(SM, Opts.LoadQueueSize, + auto RCU = std::make_unique<RetireControlUnit>(SM); + auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize); + auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias); - auto HWS = llvm::make_unique<Scheduler>(SM, *LSU); + auto HWS = std::make_unique<Scheduler>(SM, *LSU); // Create the pipeline stages. - auto Fetch = llvm::make_unique<EntryStage>(SrcMgr); - auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth, + auto Fetch = std::make_unique<EntryStage>(SrcMgr); + auto Dispatch = std::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth, *RCU, *PRF); auto Execute = - llvm::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis); - auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF); + std::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis); + auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU); // Pass the ownership of all the hardware units to this Context. addHardwareUnit(std::move(RCU)); @@ -54,10 +53,10 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB, addHardwareUnit(std::move(HWS)); // Build the pipeline. - auto StagePipeline = llvm::make_unique<Pipeline>(); + auto StagePipeline = std::make_unique<Pipeline>(); StagePipeline->appendStage(std::move(Fetch)); if (Opts.MicroOpQueueSize) - StagePipeline->appendStage(llvm::make_unique<MicroOpQueueStage>( + StagePipeline->appendStage(std::make_unique<MicroOpQueueStage>( Opts.MicroOpQueueSize, Opts.DecodersThroughput)); StagePipeline->appendStage(std::move(Dispatch)); StagePipeline->appendStage(std::move(Execute)); diff --git a/lib/MCA/HardwareUnits/LSUnit.cpp b/lib/MCA/HardwareUnits/LSUnit.cpp index ac1a6a36547b..0ee084c7ce1a 100644 --- a/lib/MCA/HardwareUnits/LSUnit.cpp +++ b/lib/MCA/HardwareUnits/LSUnit.cpp @@ -29,12 +29,12 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ, const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (!LQSize && EPI.LoadQueueID) { const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID); - LQSize = LdQDesc.BufferSize; + LQSize = std::max(0, LdQDesc.BufferSize); } if (!SQSize && EPI.StoreQueueID) { const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID); - SQSize = StQDesc.BufferSize; + SQSize = std::max(0, StQDesc.BufferSize); } } } @@ -72,9 +72,9 @@ unsigned LSUnit::dispatch(const InstRef &IR) { assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!"); if (Desc.MayLoad) - assignLQSlot(); + acquireLQSlot(); if (Desc.MayStore) - assignSQSlot(); + acquireSQSlot(); if (Desc.MayStore) { // Always create a new group for store operations. @@ -160,26 +160,28 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const { } void LSUnitBase::onInstructionExecuted(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - bool IsALoad = Desc.MayLoad; - bool IsAStore = Desc.MayStore; - assert((IsALoad || IsAStore) && "Expected a memory operation!"); - unsigned GroupID = IR.getInstruction()->getLSUTokenID(); auto It = Groups.find(GroupID); + assert(It != Groups.end() && "Instruction not dispatched to the LS unit"); It->second->onInstructionExecuted(); - if (It->second->isExecuted()) { + if (It->second->isExecuted()) Groups.erase(It); - } +} + +void LSUnitBase::onInstructionRetired(const InstRef &IR) { + const InstrDesc &Desc = IR.getInstruction()->getDesc(); + bool IsALoad = Desc.MayLoad; + bool IsAStore = Desc.MayStore; + assert((IsALoad || IsAStore) && "Expected a memory operation!"); if (IsALoad) { - UsedLQEntries--; + releaseLQSlot(); LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() << " has been removed from the load queue.\n"); } if (IsAStore) { - UsedSQEntries--; + releaseSQSlot(); LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex() << " has been removed from the store queue.\n"); } diff --git a/lib/MCA/HardwareUnits/RegisterFile.cpp b/lib/MCA/HardwareUnits/RegisterFile.cpp index 86a888ea8cae..7ea5506f11d6 100644 --- a/lib/MCA/HardwareUnits/RegisterFile.cpp +++ b/lib/MCA/HardwareUnits/RegisterFile.cpp @@ -147,7 +147,7 @@ void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry, void RegisterFile::addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs) { WriteState &WS = *Write.getWriteState(); - unsigned RegID = WS.getRegisterID(); + MCPhysReg RegID = WS.getRegisterID(); assert(RegID && "Adding an invalid register definition?"); LLVM_DEBUG({ @@ -194,7 +194,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write, } // Update zero registers. - unsigned ZeroRegisterID = + MCPhysReg ZeroRegisterID = WS.clearsSuperRegisters() ? RegID : WS.getRegisterID(); if (IsWriteZero) { ZeroRegisters.setBit(ZeroRegisterID); @@ -247,7 +247,7 @@ void RegisterFile::removeRegisterWrite( if (WS.isEliminated()) return; - unsigned RegID = WS.getRegisterID(); + MCPhysReg RegID = WS.getRegisterID(); assert(RegID != 0 && "Invalidating an already invalid register?"); assert(WS.getCyclesLeft() != UNKNOWN_CYCLES && @@ -255,7 +255,7 @@ void RegisterFile::removeRegisterWrite( assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); bool ShouldFreePhysRegs = !WS.isWriteZero(); - unsigned RenameAs = RegisterMappings[RegID].second.RenameAs; + MCPhysReg RenameAs = RegisterMappings[RegID].second.RenameAs; if (RenameAs && RenameAs != RegID) { RegID = RenameAs; @@ -355,7 +355,7 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) { void RegisterFile::collectWrites(const ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const { - unsigned RegID = RS.getRegisterID(); + MCPhysReg RegID = RS.getRegisterID(); assert(RegID && RegID < RegisterMappings.size()); LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register " << MRI.getName(RegID) << '\n'); @@ -397,7 +397,7 @@ void RegisterFile::collectWrites(const ReadState &RS, void RegisterFile::addRegisterRead(ReadState &RS, const MCSubtargetInfo &STI) const { - unsigned RegID = RS.getRegisterID(); + MCPhysReg RegID = RS.getRegisterID(); const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; RS.setPRF(RRI.IndexPlusCost.first); if (RS.isIndependentFromDef()) @@ -424,11 +424,11 @@ void RegisterFile::addRegisterRead(ReadState &RS, } } -unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const { +unsigned RegisterFile::isAvailable(ArrayRef<MCPhysReg> Regs) const { SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles()); // Find how many new mappings must be created for each register file. - for (const unsigned RegID : Regs) { + for (const MCPhysReg RegID : Regs) { const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second; const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost; if (Entry.first) diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp index 06f2476353d6..088aea3e23c6 100644 --- a/lib/MCA/HardwareUnits/ResourceManager.cpp +++ b/lib/MCA/HardwareUnits/ResourceManager.cpp @@ -104,7 +104,7 @@ void ResourceState::dump() const { static std::unique_ptr<ResourceStrategy> getStrategyFor(const ResourceState &RS) { if (RS.isAResourceGroup() || RS.getNumUnits() > 1) - return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask()); + return std::make_unique<DefaultResourceStrategy>(RS.getReadyMask()); return std::unique_ptr<ResourceStrategy>(nullptr); } @@ -114,7 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) Resource2Groups(SM.getNumProcResourceKinds() - 1, 0), ProcResID2Mask(SM.getNumProcResourceKinds(), 0), ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0), - ProcResUnitMask(0), ReservedResourceGroups(0) { + ProcResUnitMask(0), ReservedResourceGroups(0), + AvailableBuffers(~0ULL), ReservedBuffers(0) { computeProcResourceMasks(SM, ProcResID2Mask); // initialize vector ResIndex2ProcResID. @@ -127,7 +128,7 @@ ResourceManager::ResourceManager(const MCSchedModel &SM) uint64_t Mask = ProcResID2Mask[I]; unsigned Index = getResourceStateIndex(Mask); Resources[Index] = - llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask); + std::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask); Strategies[Index] = getStrategyFor(*Resources[Index]); } @@ -241,33 +242,41 @@ void ResourceManager::release(const ResourceRef &RR) { } ResourceStateEvent -ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const { - ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE; - for (uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; - Result = RS.isBufferAvailable(); - if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE) - break; - } - return Result; +ResourceManager::canBeDispatched(uint64_t ConsumedBuffers) const { + if (ConsumedBuffers & ReservedBuffers) + return ResourceStateEvent::RS_RESERVED; + if (ConsumedBuffers & (~AvailableBuffers)) + return ResourceStateEvent::RS_BUFFER_UNAVAILABLE; + return ResourceStateEvent::RS_BUFFER_AVAILABLE; } -void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) { - for (const uint64_t Buffer : Buffers) { - ResourceState &RS = *Resources[getResourceStateIndex(Buffer)]; +void ResourceManager::reserveBuffers(uint64_t ConsumedBuffers) { + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE); - RS.reserveBuffer(); - + if (!RS.reserveBuffer()) + AvailableBuffers ^= CurrentBuffer; if (RS.isADispatchHazard()) { - assert(!RS.isReserved()); - RS.setReserved(); + // Reserve this buffer now, and release it once pipeline resources + // consumed by the instruction become available again. + // We do this to simulate an in-order dispatch/issue of instructions. + ReservedBuffers ^= CurrentBuffer; } } } -void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) { - for (const uint64_t R : Buffers) - Resources[getResourceStateIndex(R)]->releaseBuffer(); +void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) { + AvailableBuffers |= ConsumedBuffers; + while (ConsumedBuffers) { + uint64_t CurrentBuffer = ConsumedBuffers & (-ConsumedBuffers); + ResourceState &RS = *Resources[getResourceStateIndex(CurrentBuffer)]; + ConsumedBuffers ^= CurrentBuffer; + RS.releaseBuffer(); + // Do not unreserve dispatch hazard resource buffers. Wait until all + // pipeline resources have been freed too. + } } uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const { @@ -322,7 +331,6 @@ void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) { if (countPopulation(RR.first) == 1) release(RR); - releaseResource(RR.first); ResourcesFreed.push_back(RR); } @@ -336,7 +344,7 @@ void ResourceManager::reserveResource(uint64_t ResourceID) { const unsigned Index = getResourceStateIndex(ResourceID); ResourceState &Resource = *Resources[Index]; assert(Resource.isAResourceGroup() && !Resource.isReserved() && - "Unexpected resource found!"); + "Unexpected resource state found!"); Resource.setReserved(); ReservedResourceGroups ^= 1ULL << Index; } @@ -347,6 +355,9 @@ void ResourceManager::releaseResource(uint64_t ResourceID) { Resource.clearReserved(); if (Resource.isAResourceGroup()) ReservedResourceGroups ^= 1ULL << Index; + // Now it is safe to release dispatch/issue resources. + if (Resource.isADispatchHazard()) + ReservedBuffers ^= 1ULL << Index; } } // namespace mca diff --git a/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/lib/MCA/HardwareUnits/RetireControlUnit.cpp index 068c5062ccdf..de519d7fd94a 100644 --- a/lib/MCA/HardwareUnits/RetireControlUnit.cpp +++ b/lib/MCA/HardwareUnits/RetireControlUnit.cpp @@ -21,65 +21,78 @@ namespace mca { RetireControlUnit::RetireControlUnit(const MCSchedModel &SM) : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), - AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { + NumROBEntries(SM.MicroOpBufferSize), + AvailableEntries(SM.MicroOpBufferSize), MaxRetirePerCycle(0) { // Check if the scheduling model provides extra information about the machine // processor. If so, then use that information to set the reorder buffer size // and the maximum number of instructions retired per cycle. if (SM.hasExtraProcessorInfo()) { const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); if (EPI.ReorderBufferSize) - AvailableSlots = EPI.ReorderBufferSize; + AvailableEntries = EPI.ReorderBufferSize; MaxRetirePerCycle = EPI.MaxRetirePerCycle; } - - assert(AvailableSlots && "Invalid reorder buffer size!"); - Queue.resize(AvailableSlots); + NumROBEntries = AvailableEntries; + assert(NumROBEntries && "Invalid reorder buffer size!"); + Queue.resize(2 * NumROBEntries); } // Reserves a number of slots, and returns a new token. -unsigned RetireControlUnit::reserveSlot(const InstRef &IR, - unsigned NumMicroOps) { - assert(isAvailable(NumMicroOps) && "Reorder Buffer unavailable!"); - unsigned NormalizedQuantity = - std::min(NumMicroOps, static_cast<unsigned>(Queue.size())); - // Zero latency instructions may have zero uOps. Artificially bump this - // value to 1. Although zero latency instructions don't consume scheduler - // resources, they still consume one slot in the retire queue. - NormalizedQuantity = std::max(NormalizedQuantity, 1U); +unsigned RetireControlUnit::dispatch(const InstRef &IR) { + const Instruction &Inst = *IR.getInstruction(); + unsigned Entries = normalizeQuantity(Inst.getNumMicroOps()); + assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!"); + unsigned TokenID = NextAvailableSlotIdx; - Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false}; - NextAvailableSlotIdx += NormalizedQuantity; + Queue[NextAvailableSlotIdx] = {IR, Entries, false}; + NextAvailableSlotIdx += std::max(1U, Entries); NextAvailableSlotIdx %= Queue.size(); - AvailableSlots -= NormalizedQuantity; + + AvailableEntries -= Entries; return TokenID; } -const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const { - return Queue[CurrentInstructionSlotIdx]; +const RetireControlUnit::RUToken &RetireControlUnit::getCurrentToken() const { + const RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; +#ifndef NDEBUG + const Instruction *Inst = Current.IR.getInstruction(); + assert(Inst && "Invalid RUToken in the RCU queue."); +#endif + return Current; +} + +unsigned RetireControlUnit::computeNextSlotIdx() const { + const RetireControlUnit::RUToken &Current = getCurrentToken(); + unsigned NextSlotIdx = CurrentInstructionSlotIdx + std::max(1U, Current.NumSlots); + return NextSlotIdx % Queue.size(); +} + +const RetireControlUnit::RUToken &RetireControlUnit::peekNextToken() const { + return Queue[computeNextSlotIdx()]; } void RetireControlUnit::consumeCurrentToken() { RetireControlUnit::RUToken &Current = Queue[CurrentInstructionSlotIdx]; - assert(Current.NumSlots && "Reserved zero slots?"); - assert(Current.IR && "Invalid RUToken in the RCU queue."); Current.IR.getInstruction()->retire(); // Update the slot index to be the next item in the circular queue. - CurrentInstructionSlotIdx += Current.NumSlots; + CurrentInstructionSlotIdx += std::max(1U, Current.NumSlots); CurrentInstructionSlotIdx %= Queue.size(); - AvailableSlots += Current.NumSlots; + AvailableEntries += Current.NumSlots; + Current = { InstRef(), 0U, false }; } void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { assert(Queue.size() > TokenID); - assert(Queue[TokenID].Executed == false && Queue[TokenID].IR); + assert(Queue[TokenID].IR.getInstruction() && "Instruction was not dispatched!"); + assert(Queue[TokenID].Executed == false && "Instruction already executed!"); Queue[TokenID].Executed = true; } #ifndef NDEBUG void RetireControlUnit::dump() const { - dbgs() << "Retire Unit: { Total Slots=" << Queue.size() - << ", Available Slots=" << AvailableSlots << " }\n"; + dbgs() << "Retire Unit: { Total ROB Entries =" << NumROBEntries + << ", Available ROB entries=" << AvailableEntries << " }\n"; } #endif diff --git a/lib/MCA/HardwareUnits/Scheduler.cpp b/lib/MCA/HardwareUnits/Scheduler.cpp index 0f0f2ffb8325..8730336c6669 100644 --- a/lib/MCA/HardwareUnits/Scheduler.cpp +++ b/lib/MCA/HardwareUnits/Scheduler.cpp @@ -21,7 +21,7 @@ namespace mca { void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) { // Ensure we have a valid (non-null) strategy object. - Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>(); + Strategy = S ? std::move(S) : std::make_unique<DefaultSchedulerStrategy>(); } // Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy. @@ -38,9 +38,8 @@ void Scheduler::dump() const { #endif Scheduler::Status Scheduler::isAvailable(const InstRef &IR) { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - - ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers); + ResourceStateEvent RSE = + Resources->canBeDispatched(IR.getInstruction()->getUsedBuffers()); HadTokenStall = RSE != RS_BUFFER_AVAILABLE; switch (RSE) { @@ -106,7 +105,7 @@ void Scheduler::issueInstruction( bool HasDependentUsers = Inst.hasDependentUsers(); HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR); - Resources->releaseBuffers(Inst.getDesc().Buffers); + Resources->releaseBuffers(Inst.getUsedBuffers()); issueInstructionImpl(IR, UsedResources); // Instructions that have been issued during this cycle might have unblocked // other dependent instructions. Dependent instructions may be issued during @@ -300,8 +299,7 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const { bool Scheduler::dispatch(InstRef &IR) { Instruction &IS = *IR.getInstruction(); - const InstrDesc &Desc = IS.getDesc(); - Resources->reserveBuffers(Desc.Buffers); + Resources->reserveBuffers(IS.getUsedBuffers()); // If necessary, reserve queue entries in the load-store unit (LSU). if (IS.isMemOp()) diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp index 829920366c90..bd28c733535c 100644 --- a/lib/MCA/InstrBuilder.cpp +++ b/lib/MCA/InstrBuilder.cpp @@ -80,7 +80,7 @@ static void initializeUsedResources(InstrDesc &ID, if (PR.BufferSize < 0) { AllInOrderResources = false; } else { - Buffers.setBit(PRE->ProcResourceIdx); + Buffers.setBit(getResourceStateIndex(Mask)); AnyDispatchHazards |= (PR.BufferSize == 0); AllInOrderResources &= (PR.BufferSize <= 1); } @@ -139,9 +139,6 @@ static void initializeUsedResources(InstrDesc &ID, } } - ID.UsedProcResUnits = UsedResourceUnits; - ID.UsedProcResGroups = UsedResourceGroups; - // A SchedWrite may specify a number of cycles in which a resource group // is reserved. For example (on target x86; cpu Haswell): // @@ -177,20 +174,13 @@ static void initializeUsedResources(InstrDesc &ID, uint64_t Mask = ProcResourceMasks[I]; if (Mask != SR.first && ((Mask & SR.first) == SR.first)) - Buffers.setBit(I); + Buffers.setBit(getResourceStateIndex(Mask)); } } - // Now set the buffers. - if (unsigned NumBuffers = Buffers.countPopulation()) { - ID.Buffers.resize(NumBuffers); - for (unsigned I = 0, E = NumProcResources; I < E && NumBuffers; ++I) { - if (Buffers[I]) { - --NumBuffers; - ID.Buffers[NumBuffers] = ProcResourceMasks[I]; - } - } - } + ID.UsedBuffers = Buffers.getZExtValue(); + ID.UsedProcResUnits = UsedResourceUnits; + ID.UsedProcResGroups = UsedResourceGroups; LLVM_DEBUG({ for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources) @@ -198,8 +188,12 @@ static void initializeUsedResources(InstrDesc &ID, << "Reserved=" << R.second.isReserved() << ", " << "#Units=" << R.second.NumUnits << ", " << "cy=" << R.second.size() << '\n'; - for (const uint64_t R : ID.Buffers) - dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n'; + uint64_t BufferIDs = ID.UsedBuffers; + while (BufferIDs) { + uint64_t Current = BufferIDs & (-BufferIDs); + dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n'; + BufferIDs ^= Current; + } dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n'; dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16) << '\n'; @@ -464,9 +458,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI, // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no // "unmodeledSideEffects", then this logic optimistically assumes that any - // extra register operands in the variadic sequence are not register + // extra register operand in the variadic sequence is not a register // definition. - bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() && !MCDesc.hasUnmodeledSideEffects(); for (unsigned I = 0, OpIndex = MCDesc.getNumOperands(); @@ -493,7 +486,7 @@ Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID, return ErrorSuccess(); bool UsesMemory = ID.MayLoad || ID.MayStore; - bool UsesBuffers = !ID.Buffers.empty(); + bool UsesBuffers = ID.UsedBuffers; bool UsesResources = !ID.Resources.empty(); if (!UsesMemory && !UsesBuffers && !UsesResources) return ErrorSuccess(); @@ -550,7 +543,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n'); // Create a new empty descriptor. - std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>(); + std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>(); ID->NumMicroOps = SCDesc.NumMicroOps; ID->SchedClassID = SchedClassID; @@ -619,7 +612,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) { if (!DescOrErr) return DescOrErr.takeError(); const InstrDesc &D = *DescOrErr; - std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D); + std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D); // Check if this is a dependency breaking instruction. APInt Mask; @@ -636,8 +629,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) { } // Initialize Reads first. + MCPhysReg RegID = 0; for (const ReadDescriptor &RD : D.Reads) { - int RegID = -1; if (!RD.isImplicitRead()) { // explicit read. const MCOperand &Op = MCI.getOperand(RD.OpIndex); @@ -655,7 +648,6 @@ InstrBuilder::createInstruction(const MCInst &MCI) { continue; // Okay, this is a register operand. Create a ReadState for it. - assert(RegID > 0 && "Invalid register ID found!"); NewIS->getUses().emplace_back(RD, RegID); ReadState &RS = NewIS->getUses().back(); @@ -696,8 +688,8 @@ InstrBuilder::createInstruction(const MCInst &MCI) { // Initialize writes. unsigned WriteIndex = 0; for (const WriteDescriptor &WD : D.Writes) { - unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID - : MCI.getOperand(WD.OpIndex).getReg(); + RegID = WD.isImplicitWrite() ? WD.RegisterID + : MCI.getOperand(WD.OpIndex).getReg(); // Check if this is a optional definition that references NoReg. if (WD.IsOptionalDef && !RegID) { ++WriteIndex; diff --git a/lib/MCA/Instruction.cpp b/lib/MCA/Instruction.cpp index 001842bca318..e5f2c4fd1eec 100644 --- a/lib/MCA/Instruction.cpp +++ b/lib/MCA/Instruction.cpp @@ -18,7 +18,7 @@ namespace llvm { namespace mca { -void WriteState::writeStartEvent(unsigned IID, unsigned RegID, +void WriteState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) { CRD.IID = IID; CRD.RegID = RegID; @@ -27,7 +27,7 @@ void WriteState::writeStartEvent(unsigned IID, unsigned RegID, DependentWrite = nullptr; } -void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) { +void ReadState::writeStartEvent(unsigned IID, MCPhysReg RegID, unsigned Cycles) { assert(DependentWrites); assert(CyclesLeft == UNKNOWN_CYCLES); diff --git a/lib/MCA/Stages/DispatchStage.cpp b/lib/MCA/Stages/DispatchStage.cpp index 7334a268e9a6..3a3d82259160 100644 --- a/lib/MCA/Stages/DispatchStage.cpp +++ b/lib/MCA/Stages/DispatchStage.cpp @@ -44,7 +44,7 @@ void DispatchStage::notifyInstructionDispatched(const InstRef &IR, } bool DispatchStage::checkPRF(const InstRef &IR) const { - SmallVector<unsigned, 4> RegDefs; + SmallVector<MCPhysReg, 4> RegDefs; for (const WriteState &RegDef : IR.getInstruction()->getDefs()) RegDefs.emplace_back(RegDef.getRegisterID()); @@ -60,7 +60,7 @@ bool DispatchStage::checkPRF(const InstRef &IR) const { } bool DispatchStage::checkRCU(const InstRef &IR) const { - const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps; + const unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps(); if (RCU.isAvailable(NumMicroOps)) return true; notifyEvent<HWStallEvent>( @@ -79,7 +79,7 @@ Error DispatchStage::dispatch(InstRef IR) { assert(!CarryOver && "Cannot dispatch another instruction!"); Instruction &IS = *IR.getInstruction(); const InstrDesc &Desc = IS.getDesc(); - const unsigned NumMicroOps = Desc.NumMicroOps; + const unsigned NumMicroOps = IS.getNumMicroOps(); if (NumMicroOps > DispatchWidth) { assert(AvailableEntries == DispatchWidth); AvailableEntries = 0; @@ -123,9 +123,10 @@ Error DispatchStage::dispatch(InstRef IR) { for (WriteState &WS : IS.getDefs()) PRF.addRegisterWrite(WriteRef(IR.getSourceIndex(), &WS), RegisterFiles); - // Reserve slots in the RCU, and notify the instruction that it has been - // dispatched to the schedulers for execution. - IS.dispatch(RCU.reserveSlot(IR, NumMicroOps)); + // Reserve entries in the reorder buffer. + unsigned RCUTokenID = RCU.dispatch(IR); + // Notify the instruction that it has been dispatched. + IS.dispatch(RCUTokenID); // Notify listeners of the "instruction dispatched" event, // and move IR to the next stage. @@ -155,8 +156,10 @@ Error DispatchStage::cycleStart() { } bool DispatchStage::isAvailable(const InstRef &IR) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth); + const Instruction &Inst = *IR.getInstruction(); + unsigned NumMicroOps = Inst.getNumMicroOps(); + const InstrDesc &Desc = Inst.getDesc(); + unsigned Required = std::min(NumMicroOps, DispatchWidth); if (Required > AvailableEntries) return false; diff --git a/lib/MCA/Stages/EntryStage.cpp b/lib/MCA/Stages/EntryStage.cpp index d2f5613a0fb6..66135790a4cd 100644 --- a/lib/MCA/Stages/EntryStage.cpp +++ b/lib/MCA/Stages/EntryStage.cpp @@ -33,7 +33,7 @@ void EntryStage::getNextInstruction() { if (!SM.hasNext()) return; SourceRef SR = SM.peekNext(); - std::unique_ptr<Instruction> Inst = llvm::make_unique<Instruction>(SR.second); + std::unique_ptr<Instruction> Inst = std::make_unique<Instruction>(SR.second); CurrentInstruction = InstRef(SR.first, Inst.get()); Instructions.emplace_back(std::move(Inst)); SM.updateNext(); diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp index a2b361fcd1bf..2284ed7f2816 100644 --- a/lib/MCA/Stages/ExecuteStage.cpp +++ b/lib/MCA/Stages/ExecuteStage.cpp @@ -56,12 +56,13 @@ Error ExecuteStage::issueInstruction(InstRef &IR) { SmallVector<InstRef, 4> Ready; HWS.issueInstruction(IR, Used, Pending, Ready); - NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps; + Instruction &IS = *IR.getInstruction(); + NumIssuedOpcodes += IS.getNumMicroOps(); notifyReservedOrReleasedBuffers(IR, /* Reserved */ false); notifyInstructionIssued(IR, Used); - if (IR.getInstruction()->isExecuted()) { + if (IS.isExecuted()) { notifyInstructionExecuted(IR); // FIXME: add a buffer of executed instructions. if (Error S = moveToTheNextStage(IR)) @@ -199,7 +200,8 @@ Error ExecuteStage::execute(InstRef &IR) { // units have been consumed. bool IsReadyInstruction = HWS.dispatch(IR); const Instruction &Inst = *IR.getInstruction(); - NumDispatchedOpcodes += Inst.getDesc().NumMicroOps; + unsigned NumMicroOps = Inst.getNumMicroOps(); + NumDispatchedOpcodes += NumMicroOps; notifyReservedOrReleasedBuffers(IR, /* Reserved */ true); if (!IsReadyInstruction) { @@ -269,13 +271,17 @@ void ExecuteStage::notifyInstructionIssued( void ExecuteStage::notifyReservedOrReleasedBuffers(const InstRef &IR, bool Reserved) const { - const InstrDesc &Desc = IR.getInstruction()->getDesc(); - if (Desc.Buffers.empty()) + uint64_t UsedBuffers = IR.getInstruction()->getDesc().UsedBuffers; + if (!UsedBuffers) return; - SmallVector<unsigned, 4> BufferIDs(Desc.Buffers.begin(), Desc.Buffers.end()); - std::transform(Desc.Buffers.begin(), Desc.Buffers.end(), BufferIDs.begin(), - [&](uint64_t Op) { return HWS.getResourceID(Op); }); + SmallVector<unsigned, 4> BufferIDs(countPopulation(UsedBuffers), 0); + for (unsigned I = 0, E = BufferIDs.size(); I < E; ++I) { + uint64_t CurrentBufferMask = UsedBuffers & (-UsedBuffers); + BufferIDs[I] = HWS.getResourceID(CurrentBufferMask); + UsedBuffers ^= CurrentBufferMask; + } + if (Reserved) { for (HWEventListener *Listener : getListeners()) Listener->onReservedBuffers(IR, BufferIDs); diff --git a/lib/MCA/Stages/RetireStage.cpp b/lib/MCA/Stages/RetireStage.cpp index e1789dd7fa2a..f792af748bce 100644 --- a/lib/MCA/Stages/RetireStage.cpp +++ b/lib/MCA/Stages/RetireStage.cpp @@ -31,11 +31,11 @@ llvm::Error RetireStage::cycleStart() { while (!RCU.isEmpty()) { if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) break; - const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken(); + const RetireControlUnit::RUToken &Current = RCU.getCurrentToken(); if (!Current.Executed) break; - RCU.consumeCurrentToken(); notifyInstructionRetired(Current.IR); + RCU.consumeCurrentToken(); NumRetired++; } @@ -52,6 +52,10 @@ void RetireStage::notifyInstructionRetired(const InstRef &IR) const { llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles()); const Instruction &Inst = *IR.getInstruction(); + // Release the load/store queue entries. + if (Inst.isMemOp()) + LSU.onInstructionRetired(IR); + for (const WriteState &WS : Inst.getDefs()) PRF.removeRegisterWrite(WS, FreedRegs); notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs)); |