diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2022-02-18 22:41:20 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2022-05-14 11:45:57 +0000 |
| commit | d56accc7c3dcc897489b6a07834763a03b9f3d68 (patch) | |
| tree | 918f41a708218122215937f4ab4e68b1a942da68 /contrib/llvm-project/llvm/lib | |
| parent | 8885dff6cec52378084211fdd9366a73833eceee (diff) | |
| parent | 7eff647615f93a9aaff1997e1880b195dc3aabe6 (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
30 files changed, 1388 insertions, 740 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 0eb6100230bd..6af5f07d801a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -148,6 +148,20 @@ static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden, cl::desc("Act like old LiveDebugValues did"), cl::init(false)); +// Limit for the maximum number of stack slots we should track, past which we +// will ignore any spills. InstrRefBasedLDV gathers detailed information on all +// stack slots which leads to high memory consumption, and in some scenarios +// (such as asan with very many locals) the working set of the function can be +// very large, causing many spills. In these scenarios, it is very unlikely that +// the developer has hundreds of variables live at the same time that they're +// carefully thinking about -- instead, they probably autogenerated the code. +// When this happens, gracefully stop tracking excess spill slots, rather than +// consuming all the developer's memory. +static cl::opt<unsigned> + StackWorkingSetLimit("livedebugvalues-max-stack-slots", cl::Hidden, + cl::desc("livedebugvalues-stack-ws-limit"), + cl::init(250)); + /// Tracker for converting machine value locations and variable values into /// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs /// specifying block live-in locations and transfers within blocks. @@ -757,9 +771,15 @@ void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB, Masks.push_back(std::make_pair(MO, InstID)); } -SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) { +Optional<SpillLocationNo> MLocTracker::getOrTrackSpillLoc(SpillLoc L) { SpillLocationNo SpillID(SpillLocs.idFor(L)); + if (SpillID.id() == 0) { + // If there is no location, and we have reached the limit of how many stack + // slots to track, then don't track this one. + if (SpillLocs.size() >= StackWorkingSetLimit) + return None; + // Spill location is untracked: create record for this one, and all // subregister slots too. SpillID = SpillLocationNo(SpillLocs.insert(L)); @@ -898,7 +918,7 @@ bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { // void InstrRefBasedLDV::printVarLocInMBB(..) #endif -SpillLocationNo +Optional<SpillLocationNo> InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { assert(MI.hasOneMemOperand() && "Spill instruction does not have exactly one memory operand?"); @@ -913,8 +933,11 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) { return MTracker->getOrTrackSpillLoc({Reg, Offset}); } -Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { - SpillLocationNo SpillLoc = extractSpillBaseRegAndOffset(MI); +Optional<LocIdx> +InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) { + Optional<SpillLocationNo> SpillLoc = extractSpillBaseRegAndOffset(MI); + if (!SpillLoc) + return None; // Where in the stack slot is this value defined -- i.e., what size of value // is this? An important question, because it could be loaded into a register @@ -930,7 +953,7 @@ Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr // occur, but the safe action is to indicate the variable is optimised out. return None; - unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second); + unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillLoc, IdxIt->second); return MTracker->getSpillMLoc(SpillID); } @@ -1006,7 +1029,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // Only handle this instruction when we are building the variable value // transfer function. - if (!VTracker) + if (!VTracker && !TTracker) return false; unsigned InstNo = MI.getOperand(0).getImm(); @@ -1162,7 +1185,8 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that // aren't immediately available). DbgValueProperties Properties(Expr, false); - VTracker->defVar(MI, Properties, NewID); + if (VTracker) + VTracker->defVar(MI, Properties, NewID); // If we're on the final pass through the function, decompose this INSTR_REF // into a plain DBG_VALUE. @@ -1251,7 +1275,12 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { Register Base; StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base); SpillLoc SL = {Base, Offs}; - SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL); + Optional<SpillLocationNo> SpillNo = MTracker->getOrTrackSpillLoc(SL); + + // We might be able to find a value, but have chosen not to, to avoid + // tracking too much stack information. + if (!SpillNo) + return true; // Problem: what value should we extract from the stack? LLVM does not // record what size the last store to the slot was, and it would become @@ -1263,7 +1292,7 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { Optional<ValueIDNum> Result = None; Optional<LocIdx> SpillLoc = None; for (unsigned CS : CandidateSizes) { - unsigned SpillID = MTracker->getLocID(SpillNo, {CS, 0}); + unsigned SpillID = MTracker->getLocID(*SpillNo, {CS, 0}); SpillLoc = MTracker->getSpillMLoc(SpillID); ValueIDNum Val = MTracker->readMLoc(*SpillLoc); // If this value was defined in it's own position, then it was probably @@ -1280,7 +1309,7 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { // "supposed" to be is more complex, and benefits a small number of // locations. if (!Result) { - unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0}); + unsigned SpillID = MTracker->getLocID(*SpillNo, {64, 0}); SpillLoc = MTracker->getSpillMLoc(SpillID); Result = MTracker->readMLoc(*SpillLoc); } @@ -1357,11 +1386,12 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // If this instruction writes to a spill slot, def that slot. if (hasFoldedStackStore(MI)) { - SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); - for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { - unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); - LocIdx L = MTracker->getSpillMLoc(SpillID); - MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L)); + if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) { + for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { + unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I); + LocIdx L = MTracker->getSpillMLoc(SpillID); + MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L)); + } } } @@ -1398,11 +1428,12 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // Tell TTracker about any folded stack store. if (hasFoldedStackStore(MI)) { - SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI); - for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { - unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I); - LocIdx L = MTracker->getSpillMLoc(SpillID); - TTracker->clobberMloc(L, MI.getIterator(), true); + if (Optional<SpillLocationNo> SpillNo = extractSpillBaseRegAndOffset(MI)) { + for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) { + unsigned SpillID = MTracker->getSpillIDWithIdx(*SpillNo, I); + LocIdx L = MTracker->getSpillMLoc(SpillID); + TTracker->clobberMloc(L, MI.getIterator(), true); + } } } } @@ -1438,23 +1469,24 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) { } } -bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI, - MachineFunction *MF) { +Optional<SpillLocationNo> +InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI, + MachineFunction *MF) { // TODO: Handle multiple stores folded into one. if (!MI.hasOneMemOperand()) - return false; + return None; // Reject any memory operand that's aliased -- we can't guarantee its value. auto MMOI = MI.memoperands_begin(); const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue(); if (PVal->isAliased(MFI)) - return false; + return None; if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII)) - return false; // This is not a spill instruction, since no valid size was - // returned from either function. + return None; // This is not a spill instruction, since no valid size was + // returned from either function. - return true; + return extractSpillBaseRegAndOffset(MI); } bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI, @@ -1511,13 +1543,11 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { // First, if there are any DBG_VALUEs pointing at a spill slot that is // written to, terminate that variable location. The value in memory // will have changed. DbgEntityHistoryCalculator doesn't try to detect this. - if (isSpillInstruction(MI, MF)) { - SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); - + if (Optional<SpillLocationNo> Loc = isSpillInstruction(MI, MF)) { // Un-set this location and clobber, so that earlier locations don't // continue past this store. for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) { - unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx); + unsigned SpillID = MTracker->getSpillIDWithIdx(*Loc, SlotIdx); Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID); if (!MLoc) continue; @@ -1535,7 +1565,9 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { // Try to recognise spill and restore instructions that may transfer a value. if (isLocationSpill(MI, MF, Reg)) { - SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI); + // isLocationSpill returning true should guarantee we can extract a + // location. + SpillLocationNo Loc = *extractSpillBaseRegAndOffset(MI); auto DoTransfer = [&](Register SrcReg, unsigned SpillID) { auto ReadValue = MTracker->readReg(SrcReg); @@ -1562,10 +1594,9 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); DoTransfer(Reg, SpillID); } else { - Optional<SpillLocationNo> OptLoc = isRestoreInstruction(MI, MF, Reg); - if (!OptLoc) + Optional<SpillLocationNo> Loc = isRestoreInstruction(MI, MF, Reg); + if (!Loc) return false; - SpillLocationNo Loc = *OptLoc; // Assumption: we're reading from the base of the stack slot, not some // offset into it. It seems very unlikely LLVM would ever generate @@ -1592,13 +1623,13 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI); - unsigned SpillID = MTracker->getLocID(Loc, Subreg); + unsigned SpillID = MTracker->getLocID(*Loc, Subreg); DoTransfer(*SRI, SpillID); } // Directly look up this registers slot idx by size, and transfer. unsigned Size = TRI->getRegSizeInBits(Reg, *MRI); - unsigned SpillID = MTracker->getLocID(Loc, {Size, 0}); + unsigned SpillID = MTracker->getLocID(*Loc, {Size, 0}); DoTransfer(Reg, SpillID); } return true; @@ -2765,6 +2796,11 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition( auto ValueIt = VLocs.Vars.find(Var); const DbgValue &Value = ValueIt->second; + // If it's an explicit assignment of "undef", that means there is no location + // anyway, anywhere. + if (Value.Kind == DbgValue::Undef) + return; + // Assign the variable value to entry to each dominated block that's in scope. // Skip the definition block -- it's assigned the variable value in the middle // of the block somewhere. @@ -2790,35 +2826,6 @@ void InstrRefBasedLDV::dump_mloc_transfer( } #endif -void InstrRefBasedLDV::emitLocations( - MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MOutLocs, - ValueIDNum **MInLocs, DenseMap<DebugVariable, unsigned> &AllVarsNumbering, - const TargetPassConfig &TPC) { - TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC); - unsigned NumLocs = MTracker->getNumLocs(); - - // For each block, load in the machine value locations and variable value - // live-ins, then step through each instruction in the block. New DBG_VALUEs - // to be inserted will be created along the way. - for (MachineBasicBlock &MBB : MF) { - unsigned bbnum = MBB.getNumber(); - MTracker->reset(); - MTracker->loadFromArray(MInLocs[bbnum], bbnum); - TTracker->loadInlocs(MBB, MInLocs[bbnum], SavedLiveIns[MBB.getNumber()], - NumLocs); - - CurBB = bbnum; - CurInst = 1; - for (auto &MI : MBB) { - process(MI, MOutLocs, MInLocs); - TTracker->checkInstForNewValues(CurInst, MI.getIterator()); - ++CurInst; - } - } - - emitTransfers(AllVarsNumbering); -} - void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { // Build some useful data structures. @@ -2861,8 +2868,192 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) { #endif } +// Produce an "ejection map" for blocks, i.e., what's the highest-numbered +// lexical scope it's used in. When exploring in DFS order and we pass that +// scope, the block can be processed and any tracking information freed. +void InstrRefBasedLDV::makeDepthFirstEjectionMap( + SmallVectorImpl<unsigned> &EjectionMap, + const ScopeToDILocT &ScopeToDILocation, + ScopeToAssignBlocksT &ScopeToAssignBlocks) { + SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore; + SmallVector<std::pair<LexicalScope *, ssize_t>, 4> WorkStack; + auto *TopScope = LS.getCurrentFunctionScope(); + + // Unlike lexical scope explorers, we explore in reverse order, to find the + // "last" lexical scope used for each block early. + WorkStack.push_back({TopScope, TopScope->getChildren().size() - 1}); + + while (!WorkStack.empty()) { + auto &ScopePosition = WorkStack.back(); + LexicalScope *WS = ScopePosition.first; + ssize_t ChildNum = ScopePosition.second--; + + const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); + if (ChildNum >= 0) { + // If ChildNum is positive, there are remaining children to explore. + // Push the child and its children-count onto the stack. + auto &ChildScope = Children[ChildNum]; + WorkStack.push_back( + std::make_pair(ChildScope, ChildScope->getChildren().size() - 1)); + } else { + WorkStack.pop_back(); + + // We've explored all children and any later blocks: examine all blocks + // in our scope. If they haven't yet had an ejection number set, then + // this scope will be the last to use that block. + auto DILocationIt = ScopeToDILocation.find(WS); + if (DILocationIt != ScopeToDILocation.end()) { + getBlocksForScope(DILocationIt->second, BlocksToExplore, + ScopeToAssignBlocks.find(WS)->second); + for (auto *MBB : BlocksToExplore) { + unsigned BBNum = MBB->getNumber(); + if (EjectionMap[BBNum] == 0) + EjectionMap[BBNum] = WS->getDFSOut(); + } + + BlocksToExplore.clear(); + } + } + } +} + +bool InstrRefBasedLDV::depthFirstVLocAndEmit( + unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation, + const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks, + LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF, + DenseMap<DebugVariable, unsigned> &AllVarsNumbering, + const TargetPassConfig &TPC) { + TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs, TPC); + unsigned NumLocs = MTracker->getNumLocs(); + VTracker = nullptr; + + // No scopes? No variable locations. + if (!LS.getCurrentFunctionScope()) { + // FIXME: this is a sticking plaster to prevent a memory leak, these + // pointers will be automagically freed by being unique pointers, shortly. + for (unsigned int I = 0; I < MaxNumBlocks; ++I) { + delete[] MInLocs[I]; + delete[] MOutLocs[I]; + } + return false; + } + + // Build map from block number to the last scope that uses the block. + SmallVector<unsigned, 16> EjectionMap; + EjectionMap.resize(MaxNumBlocks, 0); + makeDepthFirstEjectionMap(EjectionMap, ScopeToDILocation, + ScopeToAssignBlocks); + + // Helper lambda for ejecting a block -- if nothing is going to use the block, + // we can translate the variable location information into DBG_VALUEs and then + // free all of InstrRefBasedLDV's data structures. + auto EjectBlock = [&](MachineBasicBlock &MBB) -> void { + unsigned BBNum = MBB.getNumber(); + AllTheVLocs[BBNum].clear(); + + // Prime the transfer-tracker, and then step through all the block + // instructions, installing transfers. + MTracker->reset(); + MTracker->loadFromArray(MInLocs[BBNum], BBNum); + TTracker->loadInlocs(MBB, MInLocs[BBNum], Output[BBNum], NumLocs); + + CurBB = BBNum; + CurInst = 1; + for (auto &MI : MBB) { + process(MI, MOutLocs, MInLocs); + TTracker->checkInstForNewValues(CurInst, MI.getIterator()); + ++CurInst; + } + + // Free machine-location tables for this block. + delete[] MInLocs[BBNum]; + delete[] MOutLocs[BBNum]; + // Make ourselves brittle to use-after-free errors. + MInLocs[BBNum] = nullptr; + MOutLocs[BBNum] = nullptr; + // We don't need live-in variable values for this block either. + Output[BBNum].clear(); + AllTheVLocs[BBNum].clear(); + }; + + SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore; + SmallVector<std::pair<LexicalScope *, ssize_t>, 4> WorkStack; + WorkStack.push_back({LS.getCurrentFunctionScope(), 0}); + unsigned HighestDFSIn = 0; + + // Proceed to explore in depth first order. + while (!WorkStack.empty()) { + auto &ScopePosition = WorkStack.back(); + LexicalScope *WS = ScopePosition.first; + ssize_t ChildNum = ScopePosition.second++; + + // We obesrve scopes with children twice here, once descending in, once + // ascending out of the scope nest. Use HighestDFSIn as a ratchet to ensure + // we don't process a scope twice. Additionally, ignore scopes that don't + // have a DILocation -- by proxy, this means we never tracked any variable + // assignments in that scope. + auto DILocIt = ScopeToDILocation.find(WS); + if (HighestDFSIn <= WS->getDFSIn() && DILocIt != ScopeToDILocation.end()) { + const DILocation *DILoc = DILocIt->second; + auto &VarsWeCareAbout = ScopeToVars.find(WS)->second; + auto &BlocksInScope = ScopeToAssignBlocks.find(WS)->second; + + buildVLocValueMap(DILoc, VarsWeCareAbout, BlocksInScope, Output, MOutLocs, + MInLocs, AllTheVLocs); + } + + HighestDFSIn = std::max(HighestDFSIn, WS->getDFSIn()); + + // Descend into any scope nests. + const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); + if (ChildNum < (ssize_t)Children.size()) { + // There are children to explore -- push onto stack and continue. + auto &ChildScope = Children[ChildNum]; + WorkStack.push_back(std::make_pair(ChildScope, 0)); + } else { + WorkStack.pop_back(); + + // We've explored a leaf, or have explored all the children of a scope. + // Try to eject any blocks where this is the last scope it's relevant to. + auto DILocationIt = ScopeToDILocation.find(WS); + if (DILocationIt == ScopeToDILocation.end()) + continue; + + getBlocksForScope(DILocationIt->second, BlocksToExplore, + ScopeToAssignBlocks.find(WS)->second); + for (auto *MBB : BlocksToExplore) + if (WS->getDFSOut() == EjectionMap[MBB->getNumber()]) + EjectBlock(const_cast<MachineBasicBlock &>(*MBB)); + + BlocksToExplore.clear(); + } + } + + // Some artificial blocks may not have been ejected, meaning they're not + // connected to an actual legitimate scope. This can technically happen + // with things like the entry block. In theory, we shouldn't need to do + // anything for such out-of-scope blocks, but for the sake of being similar + // to VarLocBasedLDV, eject these too. + for (auto *MBB : ArtificialBlocks) + if (MOutLocs[MBB->getNumber()]) + EjectBlock(*MBB); + + // Finally, there might have been gaps in the block numbering, from dead + // blocks being deleted or folded. In those scenarios, we might allocate a + // block-table that's never ejected, meaning we have to free it at the end. + for (unsigned int I = 0; I < MaxNumBlocks; ++I) { + if (MInLocs[I]) { + delete[] MInLocs[I]; + delete[] MOutLocs[I]; + } + } + + return emitTransfers(AllVarsNumbering); +} + bool InstrRefBasedLDV::emitTransfers( - DenseMap<DebugVariable, unsigned> &AllVarsNumbering) { + DenseMap<DebugVariable, unsigned> &AllVarsNumbering) { // Go through all the transfers recorded in the TransferTracker -- this is // both the live-ins to a block, and any movements of values that happen // in the middle. @@ -3050,31 +3241,22 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, << " has " << MaxNumBlocks << " basic blocks and " << VarAssignCount << " variable assignments, exceeding limits.\n"); - } else { - // Compute the extended ranges, iterating over scopes. There might be - // something to be said for ordering them by size/locality, but that's for - // the future. For each scope, solve the variable value problem, producing - // a map of variables to values in SavedLiveIns. - for (auto &P : ScopeToVars) { - buildVLocValueMap(ScopeToDILocation[P.first], P.second, - ScopeToAssignBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs, - vlocs); - } - - // Using the computed value locations and variable values for each block, - // create the DBG_VALUE instructions representing the extended variable - // locations. - emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC); - // Did we actually make any changes? If we created any DBG_VALUEs, then yes. - Changed = TTracker->Transfers.size() != 0; + // Perform memory cleanup that emitLocations would do otherwise. + for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { + delete[] MOutLocs[Idx]; + delete[] MInLocs[Idx]; + } + } else { + // Optionally, solve the variable value problem and emit to blocks by using + // a lexical-scope-depth search. It should be functionally identical to + // the "else" block of this condition. + Changed = depthFirstVLocAndEmit( + MaxNumBlocks, ScopeToDILocation, ScopeToVars, ScopeToAssignBlocks, + SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC); } - // Common clean-up of memory. - for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { - delete[] MOutLocs[Idx]; - delete[] MInLocs[Idx]; - } + // Elements of these arrays will be deleted by emitLocations. delete[] MOutLocs; delete[] MInLocs; @@ -3092,6 +3274,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, DebugPHINumToValue.clear(); OverlapFragments.clear(); SeenFragments.clear(); + SeenDbgPHIs.clear(); return Changed; } @@ -3357,6 +3540,21 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, ValueIDNum **MLiveIns, MachineInstr &Here, uint64_t InstrNum) { + // This function will be called twice per DBG_INSTR_REF, and might end up + // computing lots of SSA information: memoize it. + auto SeenDbgPHIIt = SeenDbgPHIs.find(&Here); + if (SeenDbgPHIIt != SeenDbgPHIs.end()) + return SeenDbgPHIIt->second; + + Optional<ValueIDNum> Result = + resolveDbgPHIsImpl(MF, MLiveOuts, MLiveIns, Here, InstrNum); + SeenDbgPHIs.insert({&Here, Result}); + return Result; +} + +Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( + MachineFunction &MF, ValueIDNum **MLiveOuts, ValueIDNum **MLiveIns, + MachineInstr &Here, uint64_t InstrNum) { // Pick out records of DBG_PHI instructions that have been observed. If there // are none, then we cannot compute a value number. auto RangePair = std::equal_range(DebugPHINumToValue.begin(), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index e7383209c027..d778561db471 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -616,7 +616,9 @@ public: void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID); /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked. - SpillLocationNo getOrTrackSpillLoc(SpillLoc L); + /// Returns None when in scenarios where a spill slot could be tracked, but + /// we would likely run into resource limitations. + Optional<SpillLocationNo> getOrTrackSpillLoc(SpillLoc L); // Get LocIdx of a spill ID. LocIdx getSpillMLoc(unsigned SpillID) { @@ -678,7 +680,7 @@ public: /// movement of values between locations inside of a block is handled at a /// much later stage, in the TransferTracker class. MapVector<DebugVariable, DbgValue> Vars; - DenseMap<DebugVariable, const DILocation *> Scopes; + SmallDenseMap<DebugVariable, const DILocation *, 8> Scopes; MachineBasicBlock *MBB = nullptr; const OverlapMap &OverlappingFragments; DbgValueProperties EmptyProperties; @@ -747,6 +749,11 @@ public: Scopes[Overlapped] = Loc; } } + + void clear() { + Vars.clear(); + Scopes.clear(); + } }; // XXX XXX docs @@ -862,6 +869,12 @@ private: OverlapMap OverlapFragments; VarToFragments SeenFragments; + /// Mapping of DBG_INSTR_REF instructions to their values, for those + /// DBG_INSTR_REFs that call resolveDbgPHIs. These variable references solve + /// a mini SSA problem caused by DBG_PHIs being cloned, this collection caches + /// the result. + DenseMap<MachineInstr *, Optional<ValueIDNum>> SeenDbgPHIs; + /// True if we need to examine call instructions for stack clobbers. We /// normally assume that they don't clobber SP, but stack probes on Windows /// do. @@ -873,7 +886,8 @@ private: StringRef StackProbeSymbolName; /// Tests whether this instruction is a spill to a stack slot. - bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); + Optional<SpillLocationNo> isSpillInstruction(const MachineInstr &MI, + MachineFunction *MF); /// Decide if @MI is a spill instruction and return true if it is. We use 2 /// criteria to make this decision: @@ -891,7 +905,8 @@ private: /// Given a spill instruction, extract the spill slot information, ensure it's /// tracked, and return the spill number. - SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI); + Optional<SpillLocationNo> + extractSpillBaseRegAndOffset(const MachineInstr &MI); /// Observe a single instruction while stepping through a block. void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, @@ -940,6 +955,12 @@ private: ValueIDNum **MLiveIns, MachineInstr &Here, uint64_t InstrNum); + Optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF, + ValueIDNum **MLiveOuts, + ValueIDNum **MLiveIns, + MachineInstr &Here, + uint64_t InstrNum); + /// Step through the function, recording register definitions and movements /// in an MLocTracker. Convert the observations into a per-block transfer /// function in \p MLocTransfer, suitable for using with the machine value @@ -1050,18 +1071,6 @@ private: const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders); - /// Given the solutions to the two dataflow problems, machine value locations - /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the - /// TransferTracker class over the function to produce live-in and transfer - /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the - /// order given by AllVarsNumbering -- this could be any stable order, but - /// right now "order of appearence in function, when explored in RPO", so - /// that we can compare explictly against VarLocBasedImpl. - void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns, - ValueIDNum **MOutLocs, ValueIDNum **MInLocs, - DenseMap<DebugVariable, unsigned> &AllVarsNumbering, - const TargetPassConfig &TPC); - /// Take collections of DBG_VALUE instructions stored in TTracker, and /// install them into their output blocks. Preserves a stable order of /// DBG_VALUEs produced (which would otherwise cause nondeterminism) through @@ -1072,6 +1081,28 @@ private: /// RPOT block ordering. void initialSetup(MachineFunction &MF); + /// Produce a map of the last lexical scope that uses a block, using the + /// scopes DFSOut number. Mapping is block-number to DFSOut. + /// \p EjectionMap Pre-allocated vector in which to install the built ma. + /// \p ScopeToDILocation Mapping of LexicalScopes to their DILocations. + /// \p AssignBlocks Map of blocks where assignments happen for a scope. + void makeDepthFirstEjectionMap(SmallVectorImpl<unsigned> &EjectionMap, + const ScopeToDILocT &ScopeToDILocation, + ScopeToAssignBlocksT &AssignBlocks); + + /// When determining per-block variable values and emitting to DBG_VALUEs, + /// this function explores by lexical scope depth. Doing so means that per + /// block information can be fully computed before exploration finishes, + /// allowing us to emit it and free data structures earlier than otherwise. + /// It's also good for locality. + bool depthFirstVLocAndEmit( + unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation, + const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks, + LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF, + DenseMap<DebugVariable, unsigned> &AllVarsNumbering, + const TargetPassConfig &TPC); + bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) override; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 8f697611a82c..40770b15aa35 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -123,6 +123,11 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { } bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) { + // Enable by default on x86_64, disable if explicitly turned off on cmdline. + if (T.getArch() == llvm::Triple::x86_64 && + ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE) + return true; + // Enable if explicitly requested on command line. return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 041d7e5b4a4a..ec297579090e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2101,10 +2101,80 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, VT.getTypeForEVT(*DAG.getContext()), AS); } +/// This inverts a canonicalization in IR that replaces a variable select arm +/// with an identity constant. Codegen improves if we re-use the variable +/// operand rather than load a constant. This can also be converted into a +/// masked vector operation if the target supports it. +static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, + bool ShouldCommuteOperands) { + // Match a select as operand 1. The identity constant that we are looking for + // is only valid as operand 1 of a non-commutative binop. + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (ShouldCommuteOperands) + std::swap(N0, N1); + + // TODO: Should this apply to scalar select too? + if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT) + return SDValue(); + + unsigned Opcode = N->getOpcode(); + EVT VT = N->getValueType(0); + SDValue Cond = N1.getOperand(0); + SDValue TVal = N1.getOperand(1); + SDValue FVal = N1.getOperand(2); + + // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity(). + // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()". + // TODO: With fast-math (NSZ), allow the opposite-sign form of zero? + auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) { + if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) { + switch (Opcode) { + case ISD::FADD: // X + -0.0 --> X + return C->isZero() && C->isNegative(); + case ISD::FSUB: // X - 0.0 --> X + return C->isZero() && !C->isNegative(); + case ISD::FMUL: // X * 1.0 --> X + case ISD::FDIV: // X / 1.0 --> X + return C->isExactlyValue(1.0); + } + } + return false; + }; + + // This transform increases uses of N0, so freeze it to be safe. + // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal) + if (isIdentityConstantForOpcode(Opcode, TVal)) { + SDValue F0 = DAG.getFreeze(N0); + SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags()); + return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO); + } + // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0 + if (isIdentityConstantForOpcode(Opcode, FVal)) { + SDValue F0 = DAG.getFreeze(N0); + SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags()); + return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0); + } + + return SDValue(); +} + SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 && "Unexpected binary operator"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto BinOpcode = BO->getOpcode(); + EVT VT = BO->getValueType(0); + if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) { + if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false)) + return Sel; + + if (TLI.isCommutativeBinOp(BO->getOpcode())) + if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true)) + return Sel; + } + // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. // TODO: Handle ISD::SELECT_CC. @@ -2133,7 +2203,6 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { // propagate non constant operands into select. I.e.: // and (select Cond, 0, -1), X --> select Cond, 0, X // or X, (select Cond, -1, 0) --> select Cond, -1, X - auto BinOpcode = BO->getOpcode(); bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && @@ -2145,8 +2214,6 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !DAG.isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); - EVT VT = BO->getValueType(0); - // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp new file mode 100644 index 000000000000..0493fcd3cbc5 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp @@ -0,0 +1,58 @@ +//===-- lib/DebugInfo/Symbolize/DIFetcher.cpp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the implementation of the local debug info fetcher, which +/// searches cache directories. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/DIFetcher.h" + +#include "llvm/Debuginfod/Debuginfod.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +namespace llvm { +namespace symbolize { + +Optional<std::string> +LocalDIFetcher::fetchBuildID(ArrayRef<uint8_t> BuildID) const { + auto GetDebugPath = [&](StringRef Directory) { + SmallString<128> Path{Directory}; + sys::path::append(Path, ".build-id", + llvm::toHex(BuildID[0], /*LowerCase=*/true), + llvm::toHex(BuildID.slice(1), /*LowerCase=*/true)); + Path += ".debug"; + return Path; + }; + if (DebugFileDirectory.empty()) { + SmallString<128> Path = GetDebugPath( +#if defined(__NetBSD__) + // Try /usr/libdata/debug/.build-id/../... + "/usr/libdata/debug" +#else + // Try /usr/lib/debug/.build-id/../... + "/usr/lib/debug" +#endif + ); + if (llvm::sys::fs::exists(Path)) + return std::string(Path); + } else { + for (const auto &Directory : DebugFileDirectory) { + // Try <debug-file-directory>/.build-id/../... + SmallString<128> Path = GetDebugPath(Directory); + if (llvm::sys::fs::exists(Path)) + return std::string(Path); + } + } + return None; +} + +} // namespace symbolize +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index 7834423bbc25..0bfe00ee9ba7 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -20,7 +20,7 @@ #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/PDB/PDB.h" #include "llvm/DebugInfo/PDB/PDBContext.h" -#include "llvm/Debuginfod/Debuginfod.h" +#include "llvm/DebugInfo/Symbolize/DIFetcher.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Object/COFF.h" #include "llvm/Object/MachO.h" @@ -230,51 +230,6 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) { return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); } -bool findDebugBinary(const std::string &OrigPath, - const std::string &DebuglinkName, uint32_t CRCHash, - const std::string &FallbackDebugPath, - std::string &Result) { - SmallString<16> OrigDir(OrigPath); - llvm::sys::path::remove_filename(OrigDir); - SmallString<16> DebugPath = OrigDir; - // Try relative/path/to/original_binary/debuglink_name - llvm::sys::path::append(DebugPath, DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = std::string(DebugPath.str()); - return true; - } - // Try relative/path/to/original_binary/.debug/debuglink_name - DebugPath = OrigDir; - llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = std::string(DebugPath.str()); - return true; - } - // Make the path absolute so that lookups will go to - // "/usr/lib/debug/full/path/to/debug", not - // "/usr/lib/debug/to/debug" - llvm::sys::fs::make_absolute(OrigDir); - if (!FallbackDebugPath.empty()) { - // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name - DebugPath = FallbackDebugPath; - } else { -#if defined(__NetBSD__) - // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name - DebugPath = "/usr/libdata/debug"; -#else - // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name - DebugPath = "/usr/lib/debug"; -#endif - } - llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), - DebuglinkName); - if (checkFileCRC(DebugPath, CRCHash)) { - Result = std::string(DebugPath.str()); - return true; - } - return false; -} - bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, uint32_t &CRCHash) { if (!Obj) @@ -351,50 +306,6 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) { return BuildID; } -bool findDebugBinary(const std::vector<std::string> &DebugFileDirectory, - const ArrayRef<uint8_t> BuildID, std::string &Result) { - auto getDebugPath = [&](StringRef Directory) { - SmallString<128> Path{Directory}; - sys::path::append(Path, ".build-id", - llvm::toHex(BuildID[0], /*LowerCase=*/true), - llvm::toHex(BuildID.slice(1), /*LowerCase=*/true)); - Path += ".debug"; - return Path; - }; - if (DebugFileDirectory.empty()) { - SmallString<128> Path = getDebugPath( -#if defined(__NetBSD__) - // Try /usr/libdata/debug/.build-id/../... - "/usr/libdata/debug" -#else - // Try /usr/lib/debug/.build-id/../... - "/usr/lib/debug" -#endif - ); - if (llvm::sys::fs::exists(Path)) { - Result = std::string(Path.str()); - return true; - } - } else { - for (const auto &Directory : DebugFileDirectory) { - // Try <debug-file-directory>/.build-id/../... - SmallString<128> Path = getDebugPath(Directory); - if (llvm::sys::fs::exists(Path)) { - Result = std::string(Path.str()); - return true; - } - } - } - // Try debuginfod client cache and known servers. - Expected<std::string> PathOrErr = getCachedOrDownloadDebuginfo(BuildID); - if (!PathOrErr) { - consumeError(PathOrErr.takeError()); - return false; - } - Result = *PathOrErr; - return true; -} - } // end anonymous namespace ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, @@ -437,8 +348,7 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, std::string DebugBinaryPath; if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) return nullptr; - if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath, - DebugBinaryPath)) + if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) return nullptr; auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); if (!DbgObjOrErr) { @@ -458,7 +368,7 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, if (BuildID->size() < 2) return nullptr; std::string DebugBinaryPath; - if (!findDebugBinary(Opts.DebugFileDirectory, *BuildID, DebugBinaryPath)) + if (!findDebugBinary(*BuildID, DebugBinaryPath)) return nullptr; auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); if (!DbgObjOrErr) { @@ -468,6 +378,71 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, return DbgObjOrErr.get(); } +bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, + const std::string &DebuglinkName, + uint32_t CRCHash, std::string &Result) { + SmallString<16> OrigDir(OrigPath); + llvm::sys::path::remove_filename(OrigDir); + SmallString<16> DebugPath = OrigDir; + // Try relative/path/to/original_binary/debuglink_name + llvm::sys::path::append(DebugPath, DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = std::string(DebugPath.str()); + return true; + } + // Try relative/path/to/original_binary/.debug/debuglink_name + DebugPath = OrigDir; + llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = std::string(DebugPath.str()); + return true; + } + // Make the path absolute so that lookups will go to + // "/usr/lib/debug/full/path/to/debug", not + // "/usr/lib/debug/to/debug" + llvm::sys::fs::make_absolute(OrigDir); + if (!Opts.FallbackDebugPath.empty()) { + // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name + DebugPath = Opts.FallbackDebugPath; + } else { +#if defined(__NetBSD__) + // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name + DebugPath = "/usr/libdata/debug"; +#else + // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name + DebugPath = "/usr/lib/debug"; +#endif + } + llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), + DebuglinkName); + if (checkFileCRC(DebugPath, CRCHash)) { + Result = std::string(DebugPath.str()); + return true; + } + return false; +} + +bool LLVMSymbolizer::findDebugBinary(const ArrayRef<uint8_t> BuildID, + std::string &Result) { + Optional<std::string> Path; + Path = LocalDIFetcher(Opts.DebugFileDirectory).fetchBuildID(BuildID); + if (Path) { + Result = std::move(*Path); + return true; + } + + // Try caller-provided debug info fetchers. + for (const std::unique_ptr<DIFetcher> &Fetcher : DIFetchers) { + Path = Fetcher->fetchBuildID(BuildID); + if (Path) { + Result = std::move(*Path); + return true; + } + } + + return false; +} + Expected<LLVMSymbolizer::ObjectPair> LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, const std::string &ArchName) { diff --git a/contrib/llvm-project/llvm/lib/Debuginfod/DIFetcher.cpp b/contrib/llvm-project/llvm/lib/Debuginfod/DIFetcher.cpp new file mode 100644 index 000000000000..f0c134654534 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Debuginfod/DIFetcher.cpp @@ -0,0 +1,28 @@ +//===- llvm/DebugInfod/DIFetcher.cpp - Debug info fetcher -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines a DIFetcher implementation for obtaining debug info +/// from debuginfod. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Debuginfod/DIFetcher.h" + +#include "llvm/Debuginfod/Debuginfod.h" + +using namespace llvm; + +Optional<std::string> +DebuginfodDIFetcher::fetchBuildID(ArrayRef<uint8_t> BuildID) const { + Expected<std::string> PathOrErr = getCachedOrDownloadDebuginfo(BuildID); + if (PathOrErr) + return *PathOrErr; + consumeError(PathOrErr.takeError()); + return None; +} diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp index 2b3395b669b8..18de7dcd08f3 100644 --- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp @@ -51,6 +51,11 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zfhmin", RISCVExtensionVersion{1, 0}}, {"zfh", RISCVExtensionVersion{1, 0}}, + {"zfinx", RISCVExtensionVersion{1, 0}}, + {"zdinx", RISCVExtensionVersion{1, 0}}, + {"zhinxmin", RISCVExtensionVersion{1, 0}}, + {"zhinx", RISCVExtensionVersion{1, 0}}, + {"zba", RISCVExtensionVersion{1, 0}}, {"zbb", RISCVExtensionVersion{1, 0}}, {"zbc", RISCVExtensionVersion{1, 0}}, @@ -686,6 +691,8 @@ Error RISCVISAInfo::checkDependency() { bool HasE = Exts.count("e") != 0; bool HasD = Exts.count("d") != 0; bool HasF = Exts.count("f") != 0; + bool HasZfinx = Exts.count("zfinx") != 0; + bool HasZdinx = Exts.count("zdinx") != 0; bool HasZve32x = Exts.count("zve32x") != 0; bool HasZve32f = Exts.count("zve32f") != 0; bool HasZve64d = Exts.count("zve64d") != 0; @@ -706,17 +713,15 @@ Error RISCVISAInfo::checkDependency() { return createStringError(errc::invalid_argument, "d requires f extension to also be specified"); - // FIXME: Consider Zfinx in the future - if (HasZve32f && !HasF) + if (HasZve32f && !HasF && !HasZfinx) return createStringError( errc::invalid_argument, - "zve32f requires f extension to also be specified"); + "zve32f requires f or zfinx extension to also be specified"); - // FIXME: Consider Zdinx in the future - if (HasZve64d && !HasD) + if (HasZve64d && !HasD && !HasZdinx) return createStringError( errc::invalid_argument, - "zve64d requires d extension to also be specified"); + "zve64d requires d or zdinx extension to also be specified"); if (HasZvl && !HasVector) return createStringError( @@ -733,6 +738,9 @@ Error RISCVISAInfo::checkDependency() { static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"}; static const char *ImpliedExtsZfhmin[] = {"f"}; static const char *ImpliedExtsZfh[] = {"f"}; +static const char *ImpliedExtsZdinx[] = {"zfinx"}; +static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; +static const char *ImpliedExtsZhinx[] = {"zfinx"}; static const char *ImpliedExtsZve64d[] = {"zve64f"}; static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"}; static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"}; @@ -767,8 +775,11 @@ struct ImpliedExtsEntry { // Note: The table needs to be sorted by name. static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"v"}, {ImpliedExtsV}}, + {{"zdinx"}, {ImpliedExtsZdinx}}, {{"zfh"}, {ImpliedExtsZfh}}, {{"zfhmin"}, {ImpliedExtsZfhmin}}, + {{"zhinx"}, {ImpliedExtsZhinx}}, + {{"zhinxmin"}, {ImpliedExtsZhinxmin}}, {{"zk"}, {ImpliedExtsZk}}, {{"zkn"}, {ImpliedExtsZkn}}, {{"zks"}, {ImpliedExtsZks}}, diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 1d162610de9c..2397a6d320a2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1679,60 +1679,61 @@ let Predicates = [HasSVEorStreamingSVE] in { defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; - def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), - (FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + //These patterns exist to improve the code quality of conversions on unpacked types. + def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))), + (FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; // FP_ROUND has an additional 'precise' flag which indicates the type of rounding. // This is ignored by the pattern below where it is matched by (i64 timm0_1) - def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), - (FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))), + (FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - // Floating-point -> signed integer - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + // Signed integer -> Floating-point + def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg), + def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg), (sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))), - (SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))), - (SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))), - (SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg), (sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))), - (SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - // Floating-point -> unsigned integer - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + // Unsigned integer -> Floating-point + def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), - (UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg), + def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg), (and (nxv4i32 ZPR:$Zs), (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), - (UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), - (UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; - def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg), + def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), - (UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; + (UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9d4bdbe5d053..37b2ac4d8759 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -370,6 +370,14 @@ class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)), (inst $Op3, $Op1, $Op2)>; +multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg, + ValueType vts, Instruction inst>{ + def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))), + (inst (IMPLICIT_DEF), $Op1, $Op2)>; + def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; +} + class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty, ValueType it, ComplexPattern cpx, Instruction inst> : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -2589,8 +2597,8 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm, SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>; - + def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>, + SVEPseudo2Instr<NAME, 1>; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16, !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16, @@ -2604,8 +2612,11 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm, 1 : vt3); def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>; - def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>; + + def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>; + + defm : SVE_1_Op_PassthruUndef_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm, @@ -2614,7 +2625,8 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm, SDPatternOperator int_op, SDPatternOperator ir_op, ValueType vt1, ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { - def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>; + def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>, + SVEPseudo2Instr<NAME, 1>; // convert vt1 to a packed type for the intrinsic patterns defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16, @@ -2623,8 +2635,11 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm, 1 : vt1); def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>; - def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>; + + def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>; + + defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>; } multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 01f36e6dcdd2..95319d1b0b74 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -170,6 +170,7 @@ class RISCVAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseVTypeI(OperandVector &Operands); OperandMatchResultTy parseMaskReg(OperandVector &Operands); OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands); + OperandMatchResultTy parseGPRAsFPR(OperandVector &Operands); bool parseOperand(OperandVector &Operands, StringRef Mnemonic); @@ -273,6 +274,8 @@ struct RISCVOperand : public MCParsedAsmOperand { bool IsRV64; + bool IsGPRAsFPR; + struct RegOp { MCRegister RegNum; }; @@ -343,6 +346,14 @@ public: RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum); } + bool isGPRAsFPR() const { return isGPR() && IsGPRAsFPR; } + + bool isGPRF64AsFPR() const { return isGPR() && IsGPRAsFPR && IsRV64; } + + bool isGPRPF64AsFPR() const { + return isGPR() && IsGPRAsFPR && !IsRV64 && !((Reg.RegNum - RISCV::X0) & 1); + } + static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) { @@ -831,12 +842,14 @@ public: } static std::unique_ptr<RISCVOperand> createReg(unsigned RegNo, SMLoc S, - SMLoc E, bool IsRV64) { + SMLoc E, bool IsRV64, + bool IsGPRAsFPR = false) { auto Op = std::make_unique<RISCVOperand>(KindTy::Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; Op->IsRV64 = IsRV64; + Op->IsGPRAsFPR = IsGPRAsFPR; return Op; } @@ -1780,6 +1793,26 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) { return MatchOperand_Success; } +OperandMatchResultTy RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) { + switch (getLexer().getKind()) { + default: + return MatchOperand_NoMatch; + case AsmToken::Identifier: + StringRef Name = getLexer().getTok().getIdentifier(); + MCRegister RegNo; + matchRegisterNameHelper(isRV32E(), RegNo, Name); + + if (RegNo == RISCV::NoRegister) + return MatchOperand_NoMatch; + SMLoc S = getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + getLexer().Lex(); + Operands.push_back(RISCVOperand::createReg( + RegNo, S, E, isRV64(), !getSTI().hasFeature(RISCV::FeatureStdExtF))); + } + return MatchOperand_Success; +} + OperandMatchResultTy RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { if (getLexer().isNot(AsmToken::LParen)) { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index ff96b2b254ca..18947997dc58 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -161,6 +161,17 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 32 || RegNo & 1) + return MCDisassembler::Fail; + + MCRegister Reg = RISCV::X0 + RegNo; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { @@ -427,6 +438,27 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return MCDisassembler::Fail; } Insn = support::endian::read32le(Bytes.data()); + if (STI.getFeatureBits()[RISCV::FeatureStdExtZdinx] && + !STI.getFeatureBits()[RISCV::Feature64Bit]) { + LLVM_DEBUG(dbgs() << "Trying RV32Zdinx table (Double in Integer and" + "rv32)\n"); + Result = decodeInstruction(DecoderTableRV32Zdinx32, MI, Insn, Address, + this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + + if (STI.getFeatureBits()[RISCV::FeatureStdExtZfinx]) { + LLVM_DEBUG(dbgs() << "Trying RVZfinx table (Float in Integer):\n"); + Result = decodeInstruction(DecoderTableRVZfinx32, MI, Insn, Address, this, + STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } LLVM_DEBUG(dbgs() << "Trying RISCV32 table :\n"); Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI); Size = 4; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td index e32a8fb010de..065e731ff6bc 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td @@ -63,6 +63,43 @@ def HasStdExtZfhOrZfhmin "'Zfh' (Half-Precision Floating-Point) or " "'Zfhmin' (Half-Precision Floating-Point Minimal)">; +def FeatureStdExtZfinx + : SubtargetFeature<"zfinx", "HasStdExtZfinx", "true", + "'Zfinx' (Float in Integer)">; +def HasStdExtZfinx : Predicate<"Subtarget->hasStdExtZfinx()">, + AssemblerPredicate<(all_of FeatureStdExtZfinx), + "'Zfinx' (Float in Integer)">; + +def FeatureStdExtZdinx + : SubtargetFeature<"zdinx", "HasStdExtZdinx", "true", + "'Zdinx' (Double in Integer)", + [FeatureStdExtZfinx]>; +def HasStdExtZdinx : Predicate<"Subtarget->hasStdExtZdinx()">, + AssemblerPredicate<(all_of FeatureStdExtZdinx), + "'Zdinx' (Double in Integer)">; + +def FeatureStdExtZhinxmin + : SubtargetFeature<"zhinxmin", "HasStdExtZhinxmin", "true", + "'Zhinxmin' (Half Float in Integer Minimal)", + [FeatureStdExtZfinx]>; +def HasStdExtZhinxmin : Predicate<"Subtarget->hasStdExtZhinxmin()">, + AssemblerPredicate<(all_of FeatureStdExtZhinxmin), + "'Zhinxmin' (Half Float in Integer Minimal)">; + +def FeatureStdExtZhinx + : SubtargetFeature<"zhinx", "HasStdExtZhinx", "true", + "'Zhinx' (Half Float in Integer)", + [FeatureStdExtZfinx]>; +def HasStdExtZhinx : Predicate<"Subtarget->hasStdExtZhinx()">, + AssemblerPredicate<(all_of FeatureStdExtZhinx), + "'Zhinx' (Half Float in Integer)">; + +def HasStdExtZhinxOrZhinxmin + : Predicate<"Subtarget->hasStdExtZhinx() || Subtarget->hasStdExtZhinxmin()">, + AssemblerPredicate<(any_of FeatureStdExtZhinx, FeatureStdExtZhinxmin), + "'Zhinx' (Half Float in Integer) or " + "'Zhinxmin' (Half Float in Integer Minimal)">; + def FeatureStdExtC : SubtargetFeature<"c", "HasStdExtC", "true", "'C' (Compressed Instructions)">; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index ad003404d793..f3cc7d3fb46f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1116,14 +1116,6 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( return true; } -bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { - // Keep the conventional code flow when not optimizing. - if (MF.getFunction().hasOptNone()) - return false; - - return true; -} - bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); const MachineFunction *MF = MBB.getParent(); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h index 1e94e34acf2f..bc3ace786272 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -65,8 +65,6 @@ public: bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; - bool enableShrinkWrapping(const MachineFunction &MF) const override; - bool isSupportedStackID(TargetStackID::Value ID) const override; TargetStackID::Value getStackIDForScalableVectors() const override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 649eb57b325b..6c4d2682bcd8 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -334,6 +334,10 @@ public: return false; } + bool operator!=(const VSETVLIInfo &Other) const { + return !(*this == Other); + } + // Calculate the VSETVLIInfo visible to a block assuming this and Other are // both predecessors. VSETVLIInfo intersect(const VSETVLIInfo &Other) const { @@ -999,12 +1003,6 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; - // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was - // calculated in computeIncomingVLVTYPE. We need this to apply - // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We - // can't include predecessor information in that decision to avoid disagreeing - // with the global analysis. - VSETVLIInfo BBLocalInfo; // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. MachineInstr *PrevVSETVLIMI = nullptr; @@ -1020,7 +1018,6 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); CurInfo = getInfoForVSETVLI(MI); - BBLocalInfo = getInfoForVSETVLI(MI); PrevVSETVLIMI = &MI; continue; } @@ -1050,22 +1047,12 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { // use the predecessor information. assert(BlockInfo[MBB.getNumber()].Pred.isValid() && "Expected a valid predecessor state."); - // Don't use predecessor information if there was an earlier instruction - // in this block that allowed a vsetvli to be skipped for load/store. - if (!(BBLocalInfo.isValid() && - canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) && - needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && + if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && needVSETVLIPHI(NewInfo, MBB)) { insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); CurInfo = NewInfo; - BBLocalInfo = NewInfo; } - - // We must update BBLocalInfo for every vector instruction. - if (!BBLocalInfo.isValid()) - BBLocalInfo = NewInfo; } else { - assert(BBLocalInfo.isValid()); // If this instruction isn't compatible with the previous VL/VTYPE // we need to insert a VSETVLI. // If this is a unit-stride or strided load/store, we may be able to use @@ -1101,7 +1088,6 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (NeedInsertVSETVLI) insertVSETVLI(MBB, MI, NewInfo, CurInfo); CurInfo = NewInfo; - BBLocalInfo = NewInfo; } } PrevVSETVLIMI = nullptr; @@ -1112,9 +1098,19 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { CurInfo = VSETVLIInfo::getUnknown(); - BBLocalInfo = VSETVLIInfo::getUnknown(); PrevVSETVLIMI = nullptr; } + + // If we reach the end of the block and our current info doesn't match the + // expected info, insert a vsetvli to correct. + if (MI.isTerminator()) { + const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; + if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && + CurInfo != ExitInfo) { + insertVSETVLI(MBB, MI, ExitInfo, CurInfo); + CurInfo = ExitInfo; + } + } } } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 2837b92da81f..4f5ec6aada61 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -26,6 +26,69 @@ def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>; def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>; //===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +// Zdinx + +def GPRPF64AsFPR : AsmOperandClass { + let Name = "GPRPF64AsFPR"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + +def GPRF64AsFPR : AsmOperandClass { + let Name = "GPRF64AsFPR"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + +def FPR64INX : RegisterOperand<GPRF64> { + let ParserMatchClass = GPRF64AsFPR; + let DecoderMethod = "DecodeGPRRegisterClass"; +} + +def FPR64IN32X : RegisterOperand<GPRPF64> { + let ParserMatchClass = GPRPF64AsFPR; +} + +def DExt : ExtInfo<0, [HasStdExtD]>; +def D64Ext : ExtInfo<0, [HasStdExtD, IsRV64]>; +def ZdinxExt : ExtInfo<1, [HasStdExtZdinx, IsRV64]>; +def Zdinx32Ext : ExtInfo<2, [HasStdExtZdinx, IsRV32]>; + +def D : ExtInfo_r<DExt, FPR64>; +def D_INX : ExtInfo_r<ZdinxExt, FPR64INX>; +def D_IN32X : ExtInfo_r<Zdinx32Ext, FPR64IN32X>; + +def DD : ExtInfo_rr<DExt, FPR64, FPR64>; +def DD_INX : ExtInfo_rr<ZdinxExt, FPR64INX, FPR64INX>; +def DD_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, FPR64IN32X>; +def DF : ExtInfo_rr<DExt, FPR64, FPR32>; +def DF_INX : ExtInfo_rr<ZdinxExt, FPR64INX, FPR32INX>; +def DF_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, FPR32INX>; +def DX : ExtInfo_rr<DExt, FPR64, GPR>; +def DX_INX : ExtInfo_rr<ZdinxExt, FPR64INX, GPR>; +def DX_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, GPR>; +def DX_64 : ExtInfo_rr<D64Ext, FPR64, GPR>; +def FD : ExtInfo_rr<DExt, FPR32, FPR64>; +def FD_INX : ExtInfo_rr<ZdinxExt, FPR32INX, FPR64INX>; +def FD_IN32X : ExtInfo_rr<Zdinx32Ext, FPR32INX, FPR64IN32X>; +def XD : ExtInfo_rr<DExt, GPR, FPR64>; +def XD_INX : ExtInfo_rr<ZdinxExt, GPR, FPR64INX>; +def XD_IN32X : ExtInfo_rr<Zdinx32Ext, GPR, FPR64IN32X>; +def XD_64 : ExtInfo_rr<D64Ext, GPR, FPR64>; + +defvar DINX = [D, D_INX, D_IN32X]; +defvar DDINX = [DD, DD_INX, DD_IN32X]; +defvar DXINX = [DX, DX_INX, DX_IN32X]; +defvar DFINX = [DF, DF_INX, DF_IN32X]; +defvar FDINX = [FD, FD_INX, FD_IN32X]; +defvar XDINX = [XD, XD_INX, XD_IN32X]; +defvar DXIN64X = [DX_64, DX_INX]; +defvar XDIN64X = [XD_64, XD_INX]; + +//===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -36,106 +99,104 @@ def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>; // reflecting the order these fields are specified in the instruction // encoding. def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>; +} // Predicates = [HasStdExtD] let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in { -def FMADD_D : FPFMA_rrr_frm<OPC_MADD, 0b01, "fmadd.d", FPR64>; -def FMSUB_D : FPFMA_rrr_frm<OPC_MSUB, 0b01, "fmsub.d", FPR64>; -def FNMSUB_D : FPFMA_rrr_frm<OPC_NMSUB, 0b01, "fnmsub.d", FPR64>; -def FNMADD_D : FPFMA_rrr_frm<OPC_NMADD, 0b01, "fnmadd.d", FPR64>; +defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", DINX>; +defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", DINX>; +defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", DINX>; +defm FNMADD_D : FPFMA_rrr_frm_m<OPC_NMADD, 0b01, "fnmadd.d", DINX>; +} + +defm : FPFMADynFrmAlias_m<FMADD_D, "fmadd.d", DINX>; +defm : FPFMADynFrmAlias_m<FMSUB_D, "fmsub.d", DINX>; +defm : FPFMADynFrmAlias_m<FNMSUB_D, "fnmsub.d", DINX>; +defm : FPFMADynFrmAlias_m<FNMADD_D, "fnmadd.d", DINX>; + +let SchedRW = [WriteFALU64, ReadFALU64, ReadFALU64] in { +defm FADD_D : FPALU_rr_frm_m<0b0000001, "fadd.d", DINX>; +defm FSUB_D : FPALU_rr_frm_m<0b0000101, "fsub.d", DINX>; } +let SchedRW = [WriteFMul64, ReadFMul64, ReadFMul64] in +defm FMUL_D : FPALU_rr_frm_m<0b0001001, "fmul.d", DINX>; -def : FPFMADynFrmAlias<FMADD_D, "fmadd.d", FPR64>; -def : FPFMADynFrmAlias<FMSUB_D, "fmsub.d", FPR64>; -def : FPFMADynFrmAlias<FNMSUB_D, "fnmsub.d", FPR64>; -def : FPFMADynFrmAlias<FNMADD_D, "fnmadd.d", FPR64>; - -def FADD_D : FPALU_rr_frm<0b0000001, "fadd.d", FPR64>, - Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; -def FSUB_D : FPALU_rr_frm<0b0000101, "fsub.d", FPR64>, - Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; -def FMUL_D : FPALU_rr_frm<0b0001001, "fmul.d", FPR64>, - Sched<[WriteFMul64, ReadFMul64, ReadFMul64]>; -def FDIV_D : FPALU_rr_frm<0b0001101, "fdiv.d", FPR64>, - Sched<[WriteFDiv64, ReadFDiv64, ReadFDiv64]>; - -def : FPALUDynFrmAlias<FADD_D, "fadd.d", FPR64>; -def : FPALUDynFrmAlias<FSUB_D, "fsub.d", FPR64>; -def : FPALUDynFrmAlias<FMUL_D, "fmul.d", FPR64>; -def : FPALUDynFrmAlias<FDIV_D, "fdiv.d", FPR64>; - -def FSQRT_D : FPUnaryOp_r_frm<0b0101101, 0b00000, FPR64, FPR64, "fsqrt.d">, - Sched<[WriteFSqrt64, ReadFSqrt64]>; -def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>; +let SchedRW = [WriteFDiv64, ReadFDiv64, ReadFDiv64] in +defm FDIV_D : FPALU_rr_frm_m<0b0001101, "fdiv.d", DINX>; + +defm : FPALUDynFrmAlias_m<FADD_D, "fadd.d", DINX>; +defm : FPALUDynFrmAlias_m<FSUB_D, "fsub.d", DINX>; +defm : FPALUDynFrmAlias_m<FMUL_D, "fmul.d", DINX>; +defm : FPALUDynFrmAlias_m<FDIV_D, "fdiv.d", DINX>; + +defm FSQRT_D : FPUnaryOp_r_frm_m<0b0101101, 0b00000, DDINX, "fsqrt.d">, + Sched<[WriteFSqrt64, ReadFSqrt64]>; +defm : FPUnaryOpDynFrmAlias_m<FSQRT_D, "fsqrt.d", DDINX>; let SchedRW = [WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64], mayRaiseFPException = 0 in { -def FSGNJ_D : FPALU_rr<0b0010001, 0b000, "fsgnj.d", FPR64>; -def FSGNJN_D : FPALU_rr<0b0010001, 0b001, "fsgnjn.d", FPR64>; -def FSGNJX_D : FPALU_rr<0b0010001, 0b010, "fsgnjx.d", FPR64>; +defm FSGNJ_D : FPALU_rr_m<0b0010001, 0b000, "fsgnj.d", DINX>; +defm FSGNJN_D : FPALU_rr_m<0b0010001, 0b001, "fsgnjn.d", DINX>; +defm FSGNJX_D : FPALU_rr_m<0b0010001, 0b010, "fsgnjx.d", DINX>; } let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in { -def FMIN_D : FPALU_rr<0b0010101, 0b000, "fmin.d", FPR64>; -def FMAX_D : FPALU_rr<0b0010101, 0b001, "fmax.d", FPR64>; +defm FMIN_D : FPALU_rr_m<0b0010101, 0b000, "fmin.d", DINX>; +defm FMAX_D : FPALU_rr_m<0b0010101, 0b001, "fmax.d", DINX>; } -def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, 0b00001, FPR32, FPR64, "fcvt.s.d">, - Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>; -def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>; +defm FCVT_S_D : FPUnaryOp_r_frm_m<0b0100000, 0b00001, FDINX, "fcvt.s.d">, + Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_S_D, "fcvt.s.d", FDINX>; -def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b00000, 0b000, FPR64, FPR32, "fcvt.d.s">, - Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>; +defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, DFINX, "fcvt.d.s">, + Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>; let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in { -def FEQ_D : FPCmp_rr<0b1010001, 0b010, "feq.d", FPR64>; -def FLT_D : FPCmp_rr<0b1010001, 0b001, "flt.d", FPR64>; -def FLE_D : FPCmp_rr<0b1010001, 0b000, "fle.d", FPR64>; +defm FEQ_D : FPCmp_rr_m<0b1010001, 0b010, "feq.d", DINX>; +defm FLT_D : FPCmp_rr_m<0b1010001, 0b001, "flt.d", DINX>; +defm FLE_D : FPCmp_rr_m<0b1010001, 0b000, "fle.d", DINX>; } -let mayRaiseFPException = 0 in -def FCLASS_D : FPUnaryOp_r<0b1110001, 0b00000, 0b001, GPR, FPR64, "fclass.d">, - Sched<[WriteFClass64, ReadFClass64]>; +defm FCLASS_D : FPUnaryOp_r_m<0b1110001, 0b00000, 0b001, XDINX, "fclass.d">, + Sched<[WriteFClass64, ReadFClass64]>; -def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, 0b00000, GPR, FPR64, "fcvt.w.d">, +defm FCVT_W_D : FPUnaryOp_r_frm_m<0b1100001, 0b00000, XDINX, "fcvt.w.d">, Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; -def : FPUnaryOpDynFrmAlias<FCVT_W_D, "fcvt.w.d", GPR, FPR64>; - -def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, 0b00001, GPR, FPR64, "fcvt.wu.d">, - Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; -def : FPUnaryOpDynFrmAlias<FCVT_WU_D, "fcvt.wu.d", GPR, FPR64>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_W_D, "fcvt.w.d", XDINX>; -def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b00000, 0b000, FPR64, GPR, "fcvt.d.w">, - Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; +defm FCVT_WU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00001, XDINX, "fcvt.wu.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_D, "fcvt.wu.d", XDINX>; -def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b00001, 0b000, FPR64, GPR, "fcvt.d.wu">, +defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, DXINX, "fcvt.d.w">, Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; -} // Predicates = [HasStdExtD] -let Predicates = [HasStdExtD, IsRV64] in { -def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, 0b00010, GPR, FPR64, "fcvt.l.d">, - Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; -def : FPUnaryOpDynFrmAlias<FCVT_L_D, "fcvt.l.d", GPR, FPR64>; +defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, DXINX, "fcvt.d.wu">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; -def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, 0b00011, GPR, FPR64, "fcvt.lu.d">, +defm FCVT_L_D : FPUnaryOp_r_frm_m<0b1100001, 0b00010, XDIN64X, "fcvt.l.d">, Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; -def : FPUnaryOpDynFrmAlias<FCVT_LU_D, "fcvt.lu.d", GPR, FPR64>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_L_D, "fcvt.l.d", XDIN64X>; -let mayRaiseFPException = 0 in +defm FCVT_LU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00011, XDIN64X, "fcvt.lu.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_D, "fcvt.lu.d", XDIN64X>; + +let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in def FMV_X_D : FPUnaryOp_r<0b1110001, 0b00000, 0b000, GPR, FPR64, "fmv.x.d">, Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>; -def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, 0b00010, FPR64, GPR, "fcvt.d.l">, - Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; -def : FPUnaryOpDynFrmAlias<FCVT_D_L, "fcvt.d.l", FPR64, GPR>; - -def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, 0b00011, FPR64, GPR, "fcvt.d.lu">, +defm FCVT_D_L : FPUnaryOp_r_frm_m<0b1101001, 0b00010, DXIN64X, "fcvt.d.l">, Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; -def : FPUnaryOpDynFrmAlias<FCVT_D_LU, "fcvt.d.lu", FPR64, GPR>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_D_L, "fcvt.d.l", DXIN64X>; -let mayRaiseFPException = 0 in +defm FCVT_D_LU : FPUnaryOp_r_frm_m<0b1101001, 0b00011, DXIN64X, "fcvt.d.lu">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_D_LU, "fcvt.d.lu", DXIN64X>; + +let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in def FMV_D_X : FPUnaryOp_r<0b1111001, 0b00000, 0b000, FPR64, GPR, "fmv.d.x">, Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]>; -} // Predicates = [HasStdExtD, IsRV64] //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) @@ -164,6 +225,26 @@ def PseudoQuietFLT_D : PseudoQuietFCMP<FPR64>; } } // Predicates = [HasStdExtD] +let Predicates = [HasStdExtZdinx, IsRV64] in { +def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D_INX FPR64INX:$rd, FPR64INX:$rs, FPR64INX:$rs)>; +def : InstAlias<"fneg.d $rd, $rs", (FSGNJN_D_INX FPR64INX:$rd, FPR64INX:$rs, FPR64INX:$rs)>; + +def : InstAlias<"fgt.d $rd, $rs, $rt", + (FLT_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>; +def : InstAlias<"fge.d $rd, $rs, $rt", + (FLE_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>; +} // Predicates = [HasStdExtZdinx, IsRV64] + +let Predicates = [HasStdExtZdinx, IsRV32] in { +def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D_IN32X FPR64IN32X:$rd, FPR64IN32X:$rs, FPR64IN32X:$rs)>; +def : InstAlias<"fneg.d $rd, $rs", (FSGNJN_D_IN32X FPR64IN32X:$rd, FPR64IN32X:$rs, FPR64IN32X:$rs)>; + +def : InstAlias<"fgt.d $rd, $rs, $rt", + (FLT_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>; +def : InstAlias<"fge.d $rd, $rs, $rt", + (FLE_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>; +} // Predicates = [HasStdExtZdinx, IsRV32] + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index a8ac06ba8da3..4b45b47af451 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -57,6 +57,73 @@ def riscv_any_fcvt_wu_rv64 : PatFrags<(ops node:$src, node:$frm), // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// +// Zfinx + +def GPRAsFPR : AsmOperandClass { + let Name = "GPRAsFPR"; + let ParserMethod = "parseGPRAsFPR"; + let RenderMethod = "addRegOperands"; +} + +def FPR32INX : RegisterOperand<GPRF32> { + let ParserMatchClass = GPRAsFPR; + let DecoderMethod = "DecodeGPRRegisterClass"; +} + +// inx = 0 : f, d, zfh, zfhmin +// = 1 : zfinx, zdinx, zhinx, zhinxmin +// = 2 : zdinx_rv32 +class ExtInfo<bits<2> inx, list<Predicate> pres> { + string Suffix = !cond(!eq(inx, 0): "", + !eq(inx, 1): "_INX", + !eq(inx, 2): "_IN32X"); + list<Predicate> Predicates = pres; + string Space = !cond(!eq(inx, 0): "", + !eq(inx, 1): "RVZfinx", + !eq(inx, 2): "RV32Zdinx"); +} + +class ExtInfo_r<ExtInfo ext, DAGOperand reg> { + string Suffix = ext.Suffix; + list<Predicate> Predicates = ext.Predicates; + string Space = ext.Space; + DAGOperand Reg = reg; +} + +class ExtInfo_rr<ExtInfo ext, DAGOperand rdty, DAGOperand rs1ty> { + string Suffix = ext.Suffix; + list<Predicate> Predicates = ext.Predicates; + string Space = ext.Space; + DAGOperand RdTy = rdty; + DAGOperand Rs1Ty = rs1ty; +} + +def FExt : ExtInfo<0, [HasStdExtF]>; +def F64Ext : ExtInfo<0, [HasStdExtF, IsRV64]>; +def ZfinxExt : ExtInfo<1, [HasStdExtZfinx]>; +def Zfinx64Ext : ExtInfo<1, [HasStdExtZfinx, IsRV64]>; + +def F : ExtInfo_r<FExt, FPR32>; +def F_INX : ExtInfo_r<ZfinxExt, FPR32INX>; + +def FF : ExtInfo_rr<FExt, FPR32, FPR32>; +def FF_INX : ExtInfo_rr<ZfinxExt, FPR32INX, FPR32INX>; +def FX : ExtInfo_rr<FExt, FPR32, GPR>; +def FX_INX : ExtInfo_rr<ZfinxExt, FPR32INX, GPR>; +def FX_64 : ExtInfo_rr<F64Ext, FPR32, GPR>; +def FX_INX_64 : ExtInfo_rr<Zfinx64Ext, FPR32INX, GPR>; +def XF : ExtInfo_rr<FExt, GPR, FPR32>; +def XF_64 : ExtInfo_rr<F64Ext, GPR, FPR32>; +def XF_INX : ExtInfo_rr<ZfinxExt, GPR, FPR32INX>; +def XF_INX_64 : ExtInfo_rr<Zfinx64Ext, GPR, FPR32INX>; + +defvar FINX = [F, F_INX]; +defvar FFINX = [FF, FF_INX]; +defvar FXINX = [FX, FX_INX]; +defvar XFINX = [XF, XF_INX]; +defvar XFIN64X = [XF_64, XF_INX_64]; +defvar FXIN64X = [FX_64, FX_INX_64]; + // Floating-point rounding mode def FRMArg : AsmOperandClass { @@ -94,62 +161,123 @@ class FPStore_r<bits<3> funct3, string opcodestr, RegisterClass rty, let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, UseNamedOperandTable = 1, hasPostISelHook = 1 in class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr, - RegisterClass rty> + DAGOperand rty> : RVInstR4Frm<funct2, opcode, (outs rty:$rd), (ins rty:$rs1, rty:$rs2, rty:$rs3, frmarg:$frm), opcodestr, "$rd, $rs1, $rs2, $rs3, $frm">; +multiclass FPFMA_rrr_frm_m<RISCVOpcode opcode, bits<2> funct2, + string opcodestr, list<ExtInfo_r> Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPFMA_rrr_frm<opcode, funct2, opcodestr, Ext.Reg>; +} + class FPFMADynFrmAlias<FPFMA_rrr_frm Inst, string OpcodeStr, - RegisterClass rty> + DAGOperand rty> : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3", (Inst rty:$rd, rty:$rs1, rty:$rs2, rty:$rs3, 0b111)>; +multiclass FPFMADynFrmAlias_m<FPFMA_rrr_frm Inst, string OpcodeStr, + list<ExtInfo_r> Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates in + def : FPFMADynFrmAlias<!cast<FPFMA_rrr_frm>(Inst#Ext.Suffix), OpcodeStr, + Ext.Reg>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in class FPALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr, - RegisterClass rty> + DAGOperand rty> : RVInstR<funct7, funct3, OPC_OP_FP, (outs rty:$rd), (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">; +multiclass FPALU_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr, + list<ExtInfo_r> Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPALU_rr<funct7, funct3, opcodestr, Ext.Reg>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, UseNamedOperandTable = 1, hasPostISelHook = 1 in -class FPALU_rr_frm<bits<7> funct7, string opcodestr, RegisterClass rty> +class FPALU_rr_frm<bits<7> funct7, string opcodestr, DAGOperand rty> : RVInstRFrm<funct7, OPC_OP_FP, (outs rty:$rd), (ins rty:$rs1, rty:$rs2, frmarg:$frm), opcodestr, "$rd, $rs1, $rs2, $frm">; +multiclass FPALU_rr_frm_m<bits<7> funct7, string opcodestr, + list<ExtInfo_r> Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPALU_rr_frm<funct7, opcodestr, Ext.Reg>; +} + class FPALUDynFrmAlias<FPALU_rr_frm Inst, string OpcodeStr, - RegisterClass rty> + DAGOperand rty> : InstAlias<OpcodeStr#" $rd, $rs1, $rs2", (Inst rty:$rd, rty:$rs1, rty:$rs2, 0b111)>; +multiclass FPALUDynFrmAlias_m<FPALU_rr_frm Inst, string OpcodeStr, + list<ExtInfo_r> Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates in + def : FPALUDynFrmAlias<!cast<FPALU_rr_frm>(Inst#Ext.Suffix), OpcodeStr, + Ext.Reg>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in class FPUnaryOp_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3, - RegisterClass rdty, RegisterClass rs1ty, string opcodestr> + DAGOperand rdty, DAGOperand rs1ty, string opcodestr> : RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd), (ins rs1ty:$rs1), opcodestr, "$rd, $rs1"> { let rs2 = rs2val; } +multiclass FPUnaryOp_r_m<bits<7> funct7, bits<5> rs2val, bits<3> funct3, + list<ExtInfo_rr> Exts, string opcodestr> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPUnaryOp_r<funct7, rs2val, funct3, Ext.RdTy, Ext.Rs1Ty, + opcodestr>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, UseNamedOperandTable = 1, hasPostISelHook = 1 in -class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, RegisterClass rdty, - RegisterClass rs1ty, string opcodestr> +class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, DAGOperand rdty, + DAGOperand rs1ty, string opcodestr> : RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd), (ins rs1ty:$rs1, frmarg:$frm), opcodestr, "$rd, $rs1, $frm"> { let rs2 = rs2val; } +multiclass FPUnaryOp_r_frm_m<bits<7> funct7, bits<5> rs2val, + list<ExtInfo_rr> Exts, string opcodestr> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPUnaryOp_r_frm<funct7, rs2val, Ext.RdTy, Ext.Rs1Ty, + opcodestr>; +} class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr, - RegisterClass rdty, RegisterClass rs1ty> + DAGOperand rdty, DAGOperand rs1ty> : InstAlias<OpcodeStr#" $rd, $rs1", (Inst rdty:$rd, rs1ty:$rs1, 0b111)>; +multiclass FPUnaryOpDynFrmAlias_m<FPUnaryOp_r_frm Inst, string OpcodeStr, + list<ExtInfo_rr> Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates in + def : FPUnaryOpDynFrmAlias<!cast<FPUnaryOp_r_frm>(Inst#Ext.Suffix), + OpcodeStr, Ext.RdTy, Ext.Rs1Ty>; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr, - RegisterClass rty> + DAGOperand rty> : RVInstR<funct7, funct3, OPC_OP_FP, (outs GPR:$rd), (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">; +multiclass FPCmp_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr, + list<ExtInfo_r> Exts> { + foreach Ext = Exts in + let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in + def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.Reg>; +} //===----------------------------------------------------------------------===// // Instructions @@ -162,101 +290,100 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>; // reflecting the order these fields are specified in the instruction // encoding. def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>; +} // Predicates = [HasStdExtF] let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in { -def FMADD_S : FPFMA_rrr_frm<OPC_MADD, 0b00, "fmadd.s", FPR32>; -def FMSUB_S : FPFMA_rrr_frm<OPC_MSUB, 0b00, "fmsub.s", FPR32>; -def FNMSUB_S : FPFMA_rrr_frm<OPC_NMSUB, 0b00, "fnmsub.s", FPR32>; -def FNMADD_S : FPFMA_rrr_frm<OPC_NMADD, 0b00, "fnmadd.s", FPR32>; +defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", FINX>; +defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", FINX>; +defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", FINX>; +defm FNMADD_S : FPFMA_rrr_frm_m<OPC_NMADD, 0b00, "fnmadd.s", FINX>; +} + +defm : FPFMADynFrmAlias_m<FMADD_S, "fmadd.s", FINX>; +defm : FPFMADynFrmAlias_m<FMSUB_S, "fmsub.s", FINX>; +defm : FPFMADynFrmAlias_m<FNMSUB_S, "fnmsub.s", FINX>; +defm : FPFMADynFrmAlias_m<FNMADD_S, "fnmadd.s", FINX>; + +let SchedRW = [WriteFALU32, ReadFALU32, ReadFALU32] in { +defm FADD_S : FPALU_rr_frm_m<0b0000000, "fadd.s", FINX>; +defm FSUB_S : FPALU_rr_frm_m<0b0000100, "fsub.s", FINX>; } +let SchedRW = [WriteFMul32, ReadFMul32, ReadFMul32] in +defm FMUL_S : FPALU_rr_frm_m<0b0001000, "fmul.s", FINX>; -def : FPFMADynFrmAlias<FMADD_S, "fmadd.s", FPR32>; -def : FPFMADynFrmAlias<FMSUB_S, "fmsub.s", FPR32>; -def : FPFMADynFrmAlias<FNMSUB_S, "fnmsub.s", FPR32>; -def : FPFMADynFrmAlias<FNMADD_S, "fnmadd.s", FPR32>; - -def FADD_S : FPALU_rr_frm<0b0000000, "fadd.s", FPR32>, - Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; -def FSUB_S : FPALU_rr_frm<0b0000100, "fsub.s", FPR32>, - Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; -def FMUL_S : FPALU_rr_frm<0b0001000, "fmul.s", FPR32>, - Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>; -def FDIV_S : FPALU_rr_frm<0b0001100, "fdiv.s", FPR32>, - Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>; - -def : FPALUDynFrmAlias<FADD_S, "fadd.s", FPR32>; -def : FPALUDynFrmAlias<FSUB_S, "fsub.s", FPR32>; -def : FPALUDynFrmAlias<FMUL_S, "fmul.s", FPR32>; -def : FPALUDynFrmAlias<FDIV_S, "fdiv.s", FPR32>; - -def FSQRT_S : FPUnaryOp_r_frm<0b0101100, 0b00000, FPR32, FPR32, "fsqrt.s">, - Sched<[WriteFSqrt32, ReadFSqrt32]>; -def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>; +let SchedRW = [WriteFDiv32, ReadFDiv32, ReadFDiv32] in +defm FDIV_S : FPALU_rr_frm_m<0b0001100, "fdiv.s", FINX>; + +defm : FPALUDynFrmAlias_m<FADD_S, "fadd.s", FINX>; +defm : FPALUDynFrmAlias_m<FSUB_S, "fsub.s", FINX>; +defm : FPALUDynFrmAlias_m<FMUL_S, "fmul.s", FINX>; +defm : FPALUDynFrmAlias_m<FDIV_S, "fdiv.s", FINX>; + +defm FSQRT_S : FPUnaryOp_r_frm_m<0b0101100, 0b00000, FFINX, "fsqrt.s">, + Sched<[WriteFSqrt32, ReadFSqrt32]>; +defm : FPUnaryOpDynFrmAlias_m<FSQRT_S, "fsqrt.s", FFINX>; let SchedRW = [WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32], mayRaiseFPException = 0 in { -def FSGNJ_S : FPALU_rr<0b0010000, 0b000, "fsgnj.s", FPR32>; -def FSGNJN_S : FPALU_rr<0b0010000, 0b001, "fsgnjn.s", FPR32>; -def FSGNJX_S : FPALU_rr<0b0010000, 0b010, "fsgnjx.s", FPR32>; +defm FSGNJ_S : FPALU_rr_m<0b0010000, 0b000, "fsgnj.s", FINX>; +defm FSGNJN_S : FPALU_rr_m<0b0010000, 0b001, "fsgnjn.s", FINX>; +defm FSGNJX_S : FPALU_rr_m<0b0010000, 0b010, "fsgnjx.s", FINX>; } let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in { -def FMIN_S : FPALU_rr<0b0010100, 0b000, "fmin.s", FPR32>; -def FMAX_S : FPALU_rr<0b0010100, 0b001, "fmax.s", FPR32>; +defm FMIN_S : FPALU_rr_m<0b0010100, 0b000, "fmin.s", FINX>; +defm FMAX_S : FPALU_rr_m<0b0010100, 0b001, "fmax.s", FINX>; } -def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, 0b00000, GPR, FPR32, "fcvt.w.s">, - Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; -def : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>; - -def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, 0b00001, GPR, FPR32, "fcvt.wu.s">, +defm FCVT_W_S : FPUnaryOp_r_frm_m<0b1100000, 0b00000, XFINX, "fcvt.w.s">, Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; -def : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_W_S, "fcvt.w.s", XFINX>; + +defm FCVT_WU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00001, XFINX, "fcvt.wu.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_S, "fcvt.wu.s", XFINX>; let mayRaiseFPException = 0 in def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">, Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in { -def FEQ_S : FPCmp_rr<0b1010000, 0b010, "feq.s", FPR32>; -def FLT_S : FPCmp_rr<0b1010000, 0b001, "flt.s", FPR32>; -def FLE_S : FPCmp_rr<0b1010000, 0b000, "fle.s", FPR32>; +defm FEQ_S : FPCmp_rr_m<0b1010000, 0b010, "feq.s", FINX>; +defm FLT_S : FPCmp_rr_m<0b1010000, 0b001, "flt.s", FINX>; +defm FLE_S : FPCmp_rr_m<0b1010000, 0b000, "fle.s", FINX>; } let mayRaiseFPException = 0 in -def FCLASS_S : FPUnaryOp_r<0b1110000, 0b00000, 0b001, GPR, FPR32, "fclass.s">, - Sched<[WriteFClass32, ReadFClass32]>; - -def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, 0b00000, FPR32, GPR, "fcvt.s.w">, - Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; -def : FPUnaryOpDynFrmAlias<FCVT_S_W, "fcvt.s.w", FPR32, GPR>; +defm FCLASS_S : FPUnaryOp_r_m<0b1110000, 0b00000, 0b001, XFINX, "fclass.s">, + Sched<[WriteFClass32, ReadFClass32]>; -def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, 0b00001, FPR32, GPR, "fcvt.s.wu">, +defm FCVT_S_W : FPUnaryOp_r_frm_m<0b1101000, 0b00000, FXINX, "fcvt.s.w">, Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; -def : FPUnaryOpDynFrmAlias<FCVT_S_WU, "fcvt.s.wu", FPR32, GPR>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_S_W, "fcvt.s.w", FXINX>; + +defm FCVT_S_WU : FPUnaryOp_r_frm_m<0b1101000, 0b00001, FXINX, "fcvt.s.wu">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_S_WU, "fcvt.s.wu", FXINX>; let mayRaiseFPException = 0 in def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">, Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>; -} // Predicates = [HasStdExtF] -let Predicates = [HasStdExtF, IsRV64] in { -def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, 0b00010, GPR, FPR32, "fcvt.l.s">, - Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; -def : FPUnaryOpDynFrmAlias<FCVT_L_S, "fcvt.l.s", GPR, FPR32>; - -def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, 0b00011, GPR, FPR32, "fcvt.lu.s">, +defm FCVT_L_S : FPUnaryOp_r_frm_m<0b1100000, 0b00010, XFIN64X, "fcvt.l.s">, Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; -def : FPUnaryOpDynFrmAlias<FCVT_LU_S, "fcvt.lu.s", GPR, FPR32>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_L_S, "fcvt.l.s", XFIN64X>; -def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, 0b00010, FPR32, GPR, "fcvt.s.l">, - Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; -def : FPUnaryOpDynFrmAlias<FCVT_S_L, "fcvt.s.l", FPR32, GPR>; +defm FCVT_LU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00011, XFIN64X, "fcvt.lu.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_S, "fcvt.lu.s", XFIN64X>; -def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, 0b00011, FPR32, GPR, "fcvt.s.lu">, +defm FCVT_S_L : FPUnaryOp_r_frm_m<0b1101000, 0b00010, FXIN64X, "fcvt.s.l">, Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; -def : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>; -} // Predicates = [HasStdExtF, IsRV64] +defm : FPUnaryOpDynFrmAlias_m<FCVT_S_L, "fcvt.s.l", FXIN64X>; + +defm FCVT_S_LU : FPUnaryOp_r_frm_m<0b1101000, 0b00011, FXIN64X, "fcvt.s.lu">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_S_LU, "fcvt.s.lu", FXIN64X>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) @@ -315,6 +442,16 @@ def PseudoQuietFLT_S : PseudoQuietFCMP<FPR32>; } } // Predicates = [HasStdExtF] +let Predicates = [HasStdExtZfinx] in { +def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S_INX FPR32INX:$rd, FPR32INX:$rs, FPR32INX:$rs)>; +def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S_INX FPR32INX:$rd, FPR32INX:$rs, FPR32INX:$rs)>; + +def : InstAlias<"fgt.s $rd, $rs, $rt", + (FLT_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>; +def : InstAlias<"fge.s $rd, $rs, $rt", + (FLE_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>; +} // Predicates = [HasStdExtZfinx] + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index a2753c132354..631525484bd9 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -26,6 +26,62 @@ def riscv_fmv_x_anyexth : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_ANYEXTH>; //===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +// Zhinxmin and Zhinx + +def FPR16INX : RegisterOperand<GPRF16> { + let ParserMatchClass = GPRAsFPR; + let DecoderMethod = "DecodeGPRRegisterClass"; +} + +def ZfhExt : ExtInfo<0, [HasStdExtZfh]>; +def Zfh64Ext : ExtInfo<0, [HasStdExtZfh, IsRV64]>; +def ZfhminExt : ExtInfo<0, [HasStdExtZfhOrZfhmin]>; +def ZhinxExt : ExtInfo<1, [HasStdExtZhinx]>; +def ZhinxminExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin]>; +def Zhinx64Ext : ExtInfo<1, [HasStdExtZhinx, IsRV64]>; + +def ZfhminDExt : ExtInfo<0, [HasStdExtZfhOrZfhmin, HasStdExtD]>; +def ZhinxminZdinxExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx]>; + +def H : ExtInfo_r<ZfhExt, FPR16>; +def H_INX : ExtInfo_r<ZhinxExt, FPR16INX>; + +def HH : ExtInfo_rr<ZfhExt, FPR16, FPR16>; +def HH_INX : ExtInfo_rr<ZhinxExt, FPR16INX, FPR16INX>; +def XH : ExtInfo_rr<ZfhExt, GPR, FPR16>; +def XH_INX : ExtInfo_rr<ZhinxExt, GPR, FPR16INX>; +def HX : ExtInfo_rr<ZfhExt, FPR16, GPR>; +def HX_INX : ExtInfo_rr<ZhinxExt, FPR16INX, GPR>; +def XH_64 : ExtInfo_rr<Zfh64Ext, GPR, FPR16>; +def HX_64 : ExtInfo_rr<Zfh64Ext, FPR16, GPR>; +def XH_INX_64 : ExtInfo_rr<Zhinx64Ext, GPR, FPR16INX>; +def HX_INX_64 : ExtInfo_rr<Zhinx64Ext, FPR16INX, GPR>; +def HFmin : ExtInfo_rr<ZfhminExt, FPR16, FPR32>; +def HF_INXmin : ExtInfo_rr<ZhinxminExt, FPR16INX, FPR32INX>; +def HF_INX : ExtInfo_rr<ZhinxExt, FPR16INX, FPR32INX>; +def FHmin : ExtInfo_rr<ZfhminExt, FPR32, FPR16>; +def FH_INXmin : ExtInfo_rr<ZhinxminExt, FPR32INX, FPR16INX>; +def FH_INX : ExtInfo_rr<ZhinxExt, FPR32INX, FPR16INX>; +def DHmin : ExtInfo_rr<ZfhminDExt, FPR64, FPR16>; +def DH_INXmin : ExtInfo_rr<ZhinxminZdinxExt, FPR64INX, FPR16INX>; +def HDmin : ExtInfo_rr<ZfhminDExt, FPR16, FPR64>; +def HD_INXmin : ExtInfo_rr<ZhinxminZdinxExt, FPR16INX, FPR64INX>; + +defvar HINX = [H, H_INX]; +defvar HHINX = [HH, HH_INX]; +defvar XHINX = [XH, XH_INX]; +defvar HXINX = [HX, HX_INX]; +defvar XHIN64X = [XH_64, XH_INX_64]; +defvar HXIN64X = [HX_64, HX_INX_64]; +defvar HFINXmin = [HFmin, HF_INXmin]; +defvar FHINXmin = [FHmin, FH_INXmin]; +defvar DHINXmin = [DHmin, DH_INXmin]; +defvar HDINXmin = [HDmin, HD_INXmin]; + +//===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -38,74 +94,73 @@ def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>; def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>; } // Predicates = [HasStdExtZfhOrZfhmin] -let Predicates = [HasStdExtZfh] in { let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in { -def FMADD_H : FPFMA_rrr_frm<OPC_MADD, 0b10, "fmadd.h", FPR16>; -def FMSUB_H : FPFMA_rrr_frm<OPC_MSUB, 0b10, "fmsub.h", FPR16>; -def FNMSUB_H : FPFMA_rrr_frm<OPC_NMSUB, 0b10, "fnmsub.h", FPR16>; -def FNMADD_H : FPFMA_rrr_frm<OPC_NMADD, 0b10, "fnmadd.h", FPR16>; +defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", HINX>; +defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", HINX>; +defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", HINX>; +defm FNMADD_H : FPFMA_rrr_frm_m<OPC_NMADD, 0b10, "fnmadd.h", HINX>; } -def : FPFMADynFrmAlias<FMADD_H, "fmadd.h", FPR16>; -def : FPFMADynFrmAlias<FMSUB_H, "fmsub.h", FPR16>; -def : FPFMADynFrmAlias<FNMSUB_H, "fnmsub.h", FPR16>; -def : FPFMADynFrmAlias<FNMADD_H, "fnmadd.h", FPR16>; - -def FADD_H : FPALU_rr_frm<0b0000010, "fadd.h", FPR16>, - Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>; -def FSUB_H : FPALU_rr_frm<0b0000110, "fsub.h", FPR16>, - Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>; -def FMUL_H : FPALU_rr_frm<0b0001010, "fmul.h", FPR16>, - Sched<[WriteFMul16, ReadFMul16, ReadFMul16]>; -def FDIV_H : FPALU_rr_frm<0b0001110, "fdiv.h", FPR16>, - Sched<[WriteFDiv16, ReadFDiv16, ReadFDiv16]>; - -def : FPALUDynFrmAlias<FADD_H, "fadd.h", FPR16>; -def : FPALUDynFrmAlias<FSUB_H, "fsub.h", FPR16>; -def : FPALUDynFrmAlias<FMUL_H, "fmul.h", FPR16>; -def : FPALUDynFrmAlias<FDIV_H, "fdiv.h", FPR16>; - -def FSQRT_H : FPUnaryOp_r_frm<0b0101110, 0b00000, FPR16, FPR16, "fsqrt.h">, - Sched<[WriteFSqrt16, ReadFSqrt16]>; -def : FPUnaryOpDynFrmAlias<FSQRT_H, "fsqrt.h", FPR16, FPR16>; +defm : FPFMADynFrmAlias_m<FMADD_H, "fmadd.h", HINX>; +defm : FPFMADynFrmAlias_m<FMSUB_H, "fmsub.h", HINX>; +defm : FPFMADynFrmAlias_m<FNMSUB_H, "fnmsub.h", HINX>; +defm : FPFMADynFrmAlias_m<FNMADD_H, "fnmadd.h", HINX>; + +let SchedRW = [WriteFALU16, ReadFALU16, ReadFALU16] in { +defm FADD_H : FPALU_rr_frm_m<0b0000010, "fadd.h", HINX>; +defm FSUB_H : FPALU_rr_frm_m<0b0000110, "fsub.h", HINX>; +} +let SchedRW = [WriteFMul16, ReadFMul16, ReadFMul16] in +defm FMUL_H : FPALU_rr_frm_m<0b0001010, "fmul.h", HINX>; + +let SchedRW = [WriteFDiv16, ReadFDiv16, ReadFDiv16] in +defm FDIV_H : FPALU_rr_frm_m<0b0001110, "fdiv.h", HINX>; + +defm : FPALUDynFrmAlias_m<FADD_H, "fadd.h", HINX>; +defm : FPALUDynFrmAlias_m<FSUB_H, "fsub.h", HINX>; +defm : FPALUDynFrmAlias_m<FMUL_H, "fmul.h", HINX>; +defm : FPALUDynFrmAlias_m<FDIV_H, "fdiv.h", HINX>; + +defm FSQRT_H : FPUnaryOp_r_frm_m<0b0101110, 0b00000, HHINX, "fsqrt.h">, + Sched<[WriteFSqrt16, ReadFSqrt16]>; +defm : FPUnaryOpDynFrmAlias_m<FSQRT_H, "fsqrt.h", HHINX>; let SchedRW = [WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16], mayRaiseFPException = 0 in { -def FSGNJ_H : FPALU_rr<0b0010010, 0b000, "fsgnj.h", FPR16>; -def FSGNJN_H : FPALU_rr<0b0010010, 0b001, "fsgnjn.h", FPR16>; -def FSGNJX_H : FPALU_rr<0b0010010, 0b010, "fsgnjx.h", FPR16>; +defm FSGNJ_H : FPALU_rr_m<0b0010010, 0b000, "fsgnj.h", HINX>; +defm FSGNJN_H : FPALU_rr_m<0b0010010, 0b001, "fsgnjn.h", HINX>; +defm FSGNJX_H : FPALU_rr_m<0b0010010, 0b010, "fsgnjx.h", HINX>; } let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in { -def FMIN_H : FPALU_rr<0b0010110, 0b000, "fmin.h", FPR16>; -def FMAX_H : FPALU_rr<0b0010110, 0b001, "fmax.h", FPR16>; +defm FMIN_H : FPALU_rr_m<0b0010110, 0b000, "fmin.h", HINX>; +defm FMAX_H : FPALU_rr_m<0b0010110, 0b001, "fmax.h", HINX>; } -def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, 0b00000, GPR, FPR16, "fcvt.w.h">, - Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; -def : FPUnaryOpDynFrmAlias<FCVT_W_H, "fcvt.w.h", GPR, FPR16>; - -def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, 0b00001, GPR, FPR16, "fcvt.wu.h">, +defm FCVT_W_H : FPUnaryOp_r_frm_m<0b1100010, 0b00000, XHINX, "fcvt.w.h">, Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; -def : FPUnaryOpDynFrmAlias<FCVT_WU_H, "fcvt.wu.h", GPR, FPR16>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_W_H, "fcvt.w.h", XHINX>; -def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, 0b00000, FPR16, GPR, "fcvt.h.w">, - Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; -def : FPUnaryOpDynFrmAlias<FCVT_H_W, "fcvt.h.w", FPR16, GPR>; +defm FCVT_WU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00001, XHINX, "fcvt.wu.h">, + Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_H, "fcvt.wu.h", XHINX>; -def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">, +defm FCVT_H_W : FPUnaryOp_r_frm_m<0b1101010, 0b00000, HXINX, "fcvt.h.w">, Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; -def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>; -} // Predicates = [HasStdExtZfh] +defm : FPUnaryOpDynFrmAlias_m<FCVT_H_W, "fcvt.h.w", HXINX>; -let Predicates = [HasStdExtZfhOrZfhmin] in { -def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">, - Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>; -def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>; +defm FCVT_H_WU : FPUnaryOp_r_frm_m<0b1101010, 0b00001, HXINX, "fcvt.h.wu">, + Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_H_WU, "fcvt.h.wu", HXINX>; -def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b00010, 0b000, FPR32, FPR16, "fcvt.s.h">, +defm FCVT_H_S : FPUnaryOp_r_frm_m<0b0100010, 0b00000, HFINXmin, "fcvt.h.s">, + Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_H_S, "fcvt.h.s", HFINXmin>; + +defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, FHINXmin, "fcvt.s.h">, Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>; +let Predicates = [HasStdExtZfhOrZfhmin] in { let mayRaiseFPException = 0 in def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">, Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]>; @@ -115,45 +170,38 @@ def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">, Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>; } // Predicates = [HasStdExtZfhOrZfhmin] -let Predicates = [HasStdExtZfh] in { - let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in { -def FEQ_H : FPCmp_rr<0b1010010, 0b010, "feq.h", FPR16>; -def FLT_H : FPCmp_rr<0b1010010, 0b001, "flt.h", FPR16>; -def FLE_H : FPCmp_rr<0b1010010, 0b000, "fle.h", FPR16>; +defm FEQ_H : FPCmp_rr_m<0b1010010, 0b010, "feq.h", HINX>; +defm FLT_H : FPCmp_rr_m<0b1010010, 0b001, "flt.h", HINX>; +defm FLE_H : FPCmp_rr_m<0b1010010, 0b000, "fle.h", HINX>; } let mayRaiseFPException = 0 in -def FCLASS_H : FPUnaryOp_r<0b1110010, 0b00000, 0b001, GPR, FPR16, "fclass.h">, - Sched<[WriteFClass16, ReadFClass16]>; -} // Predicates = [HasStdExtZfh] - -let Predicates = [HasStdExtZfh, IsRV64] in { -def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, 0b00010, GPR, FPR16, "fcvt.l.h">, - Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; -def : FPUnaryOpDynFrmAlias<FCVT_L_H, "fcvt.l.h", GPR, FPR16>; +defm FCLASS_H : FPUnaryOp_r_m<0b1110010, 0b00000, 0b001, XHINX, "fclass.h">, + Sched<[WriteFClass16, ReadFClass16]>; -def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, 0b00011, GPR, FPR16, "fcvt.lu.h">, +defm FCVT_L_H : FPUnaryOp_r_frm_m<0b1100010, 0b00010, XHIN64X, "fcvt.l.h">, Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; -def : FPUnaryOpDynFrmAlias<FCVT_LU_H, "fcvt.lu.h", GPR, FPR16>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_L_H, "fcvt.l.h", XHIN64X>; -def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, 0b00010, FPR16, GPR, "fcvt.h.l">, - Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; -def : FPUnaryOpDynFrmAlias<FCVT_H_L, "fcvt.h.l", FPR16, GPR>; +defm FCVT_LU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00011, XHIN64X, "fcvt.lu.h">, + Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_H, "fcvt.lu.h", XHIN64X>; -def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">, +defm FCVT_H_L : FPUnaryOp_r_frm_m<0b1101010, 0b00010, HXIN64X, "fcvt.h.l">, Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; -def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>; -} // Predicates = [HasStdExtZfh, IsRV64] +defm : FPUnaryOpDynFrmAlias_m<FCVT_H_L, "fcvt.h.l", HXIN64X>; -let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in { -def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">, - Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>; -def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>; +defm FCVT_H_LU : FPUnaryOp_r_frm_m<0b1101010, 0b00011, HXIN64X, "fcvt.h.lu">, + Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_H_LU, "fcvt.h.lu", HXIN64X>; -def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">, - Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>; -} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] +defm FCVT_H_D : FPUnaryOp_r_frm_m<0b0100010, 0b00001, HDINXmin, "fcvt.h.d">, + Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>; +defm : FPUnaryOpDynFrmAlias_m<FCVT_H_D, "fcvt.h.d", HDINXmin>; + +defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, DHINXmin, "fcvt.d.h">, + Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) @@ -186,6 +234,17 @@ def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>; } } // Predicates = [HasStdExtZfhOrZfhmin] +let Predicates = [HasStdExtZhinx] in { +def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>; +def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>; +def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H_INX FPR16INX:$rd, FPR16INX:$rs, FPR16INX:$rs)>; + +def : InstAlias<"fgt.h $rd, $rs, $rt", + (FLT_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>; +def : InstAlias<"fge.h $rd, $rs, $rt", + (FLE_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>; +} // Predicates = [HasStdExtZhinx] + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 8c1c03b51c24..b06af3787b5d 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -66,6 +66,7 @@ def sub_vrm1_5 : ComposedSubRegIndex<sub_vrm2_2, sub_vrm1_1>; def sub_vrm1_6 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_0>; def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>; +def sub_32_hi : SubRegIndex<32, 32>; } // Namespace = "RISCV" // Integer registers @@ -534,6 +535,35 @@ def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> { let Size = 64; } +let RegInfos = XLenRI in { +def GPRF16 : RegisterClass<"RISCV", [f16], 16, (add GPR)>; +def GPRF32 : RegisterClass<"RISCV", [f32], 32, (add GPR)>; +def GPRF64 : RegisterClass<"RISCV", [f64], 64, (add GPR)>; +} // RegInfos = XLenRI + +let RegAltNameIndices = [ABIRegAltName] in { + foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, + 24, 26, 28, 30] in { + defvar Reg = !cast<Register>("X"#Index); + def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName, + [!cast<Register>("X"#Index), + !cast<Register>("X"#!add(Index, 1))], + Reg.AltNames> { + let SubRegIndices = [sub_32, sub_32_hi]; + } + } +} + +let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in +def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add + X10_PD, X12_PD, X14_PD, X16_PD, + X6_PD, + X28_PD, X30_PD, + X8_PD, + X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, + X0_PD, X2_PD, X4_PD +)>; + // The register class is added for inline assembly for vector mask types. def VM : VReg<VMaskVTs, (add (sequence "V%u", 8, 31), diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h index 34c6e8e684ac..c8237e60b4e4 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -83,6 +83,10 @@ private: bool HasStdExtZve64d = false; bool HasStdExtZfhmin = false; bool HasStdExtZfh = false; + bool HasStdExtZfinx = false; + bool HasStdExtZdinx = false; + bool HasStdExtZhinxmin = false; + bool HasStdExtZhinx = false; bool HasStdExtZbkb = false; bool HasStdExtZbkc = false; bool HasStdExtZbkx = false; @@ -170,6 +174,10 @@ public: bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; } bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } + bool hasStdExtZfinx() const { return HasStdExtZfinx; } + bool hasStdExtZdinx() const { return HasStdExtZdinx; } + bool hasStdExtZhinxmin() const { return HasStdExtZhinxmin; } + bool hasStdExtZhinx() const { return HasStdExtZhinx; } bool hasStdExtZbkb() const { return HasStdExtZbkb; } bool hasStdExtZbkc() const { return HasStdExtZbkc; } bool hasStdExtZbkx() const { return HasStdExtZbkx; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td index 380507308c3d..bafba2ee09c3 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td @@ -1169,6 +1169,8 @@ def ProcessorFeatures { TuningFastBEXTR, TuningFast15ByteNOP, TuningBranchFusion, + TuningFastScalarFSQRT, + TuningFastVectorFSQRT, TuningFastScalarShiftMasks, TuningFastMOVBE, TuningSlowSHLD, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp index 51f2ced321bb..0971a0e33f98 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -99,7 +99,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || MFI.hasStackMap() || MFI.hasPatchPoint() || - MFI.hasCopyImplyingStackAdjustment()); + (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment())); } static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { @@ -1289,6 +1289,9 @@ bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); } +/// Return true if we need to use the restricted Windows x64 prologue and +/// epilogue code patterns that can be described with WinCFI (.seh_* +/// directives). bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 824ea7bbc843..77c2e7d16990 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33418,6 +33418,20 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { return !(VT1 == MVT::i32 && VT2 == MVT::i16); } +bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode, + EVT VT) const { + // TODO: This is too general. There are cases where pre-AVX512 codegen would + // benefit. The transform may also be profitable for scalar code. + if (!Subtarget.hasAVX512()) + return false; + if (!Subtarget.hasVLX() && !VT.is512BitVector()) + return false; + if (!VT.isVector()) + return false; + + return true; +} + /// Targets can use this to indicate that they only support *some* /// VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values @@ -43108,38 +43122,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } } - // If this extract is from a loaded vector value and will be used as an - // integer, that requires a potentially expensive XMM -> GPR transfer. - // Additionally, if we can convert to a scalar integer load, that will likely - // be folded into a subsequent integer op. - // Note: Unlike the related fold for this in DAGCombiner, this is not limited - // to a single-use of the loaded vector. For the reasons above, we - // expect this to be profitable even if it creates an extra load. - bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) { - return Use->getOpcode() == ISD::STORE || - Use->getOpcode() == ISD::INSERT_VECTOR_ELT || - Use->getOpcode() == ISD::SCALAR_TO_VECTOR; - }); - auto *LoadVec = dyn_cast<LoadSDNode>(InputVector); - if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() && - SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() && - !LikelyUsedAsVector) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue NewPtr = - TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx); - unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8; - MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff); - Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff); - SDValue Load = - DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment, - LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo()); - SDValue Chain = Load.getValue(1); - SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)}; - SDValue To[] = {Load, Chain}; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - return SDValue(N, 0); - } - return SDValue(); } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h index 3f6d567d3f4d..50c7e2c319f6 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h @@ -1288,6 +1288,9 @@ namespace llvm { /// from i32 to i8 but not from i32 to i16. bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; + bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, + EVT VT) const override; + /// Given an intrinsic, checks if on the target the intrinsic will need to map /// to a MemIntrinsicNode (touches memory). If this is the case, it returns /// true and stores the intrinsic information into the IntrinsicInfo that was diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 2d88e329e093..4e4f768ed2cb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -236,7 +236,8 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, } // Ensure the result has the requested type. - Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast"); + Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, ResTy, + Ptr->getName() + ".cast"); LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); return Ptr; @@ -2691,40 +2692,38 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { return true; }; - auto InspectReturnInstForUB = - [&](Value &V, const SmallSetVector<ReturnInst *, 4> RetInsts) { - // Check if a return instruction always cause UB or not - // Note: It is guaranteed that the returned position of the anchor - // scope has noundef attribute when this is called. - // We also ensure the return position is not "assumed dead" - // because the returned value was then potentially simplified to - // `undef` in AAReturnedValues without removing the `noundef` - // attribute yet. - - // When the returned position has noundef attriubte, UB occur in the - // following cases. - // (1) Returned value is known to be undef. - // (2) The value is known to be a null pointer and the returned - // position has nonnull attribute (because the returned value is - // poison). - bool FoundUB = false; - if (isa<UndefValue>(V)) { - FoundUB = true; - } else { - if (isa<ConstantPointerNull>(V)) { - auto &NonNullAA = A.getAAFor<AANonNull>( - *this, IRPosition::returned(*getAnchorScope()), - DepClassTy::NONE); - if (NonNullAA.isKnownNonNull()) - FoundUB = true; - } - } + auto InspectReturnInstForUB = [&](Instruction &I) { + auto &RI = cast<ReturnInst>(I); + // Either we stopped and the appropriate action was taken, + // or we got back a simplified return value to continue. + Optional<Value *> SimplifiedRetValue = + stopOnUndefOrAssumed(A, RI.getReturnValue(), &I); + if (!SimplifiedRetValue.hasValue() || !SimplifiedRetValue.getValue()) + return true; - if (FoundUB) - for (ReturnInst *RI : RetInsts) - KnownUBInsts.insert(RI); - return true; - }; + // Check if a return instruction always cause UB or not + // Note: It is guaranteed that the returned position of the anchor + // scope has noundef attribute when this is called. + // We also ensure the return position is not "assumed dead" + // because the returned value was then potentially simplified to + // `undef` in AAReturnedValues without removing the `noundef` + // attribute yet. + + // When the returned position has noundef attriubte, UB occurs in the + // following cases. + // (1) Returned value is known to be undef. + // (2) The value is known to be a null pointer and the returned + // position has nonnull attribute (because the returned value is + // poison). + if (isa<ConstantPointerNull>(*SimplifiedRetValue)) { + auto &NonNullAA = A.getAAFor<AANonNull>( + *this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE); + if (NonNullAA.isKnownNonNull()) + KnownUBInsts.insert(&I); + } + + return true; + }; bool UsedAssumedInformation = false; A.checkForAllInstructions(InspectMemAccessInstForUB, *this, @@ -2747,8 +2746,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { auto &RetPosNoUndefAA = A.getAAFor<AANoUndef>(*this, ReturnIRP, DepClassTy::NONE); if (RetPosNoUndefAA.isKnownNoUndef()) - A.checkForAllReturnedValuesAndReturnInsts(InspectReturnInstForUB, - *this); + A.checkForAllInstructions(InspectReturnInstForUB, *this, + {Instruction::Ret}, UsedAssumedInformation, + /* CheckBBLivenessOnly */ true); } } @@ -6749,8 +6749,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Type *PrivPtrType = PrivType->getPointerTo(); if (Base->getType() != PrivPtrType) - Base = BitCastInst::CreateBitOrPointerCast(Base, PrivPtrType, "", - ACS.getInstruction()); + Base = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + Base, PrivPtrType, "", ACS.getInstruction()); // Traverse the type, build GEPs and loads. if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) { @@ -6817,14 +6817,16 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { Function &ReplacementFn, Function::arg_iterator ArgIt) { BasicBlock &EntryBB = ReplacementFn.getEntryBlock(); Instruction *IP = &*EntryBB.getFirstInsertionPt(); - Instruction *AI = new AllocaInst(PrivatizableType.getValue(), 0, + const DataLayout &DL = IP->getModule()->getDataLayout(); + unsigned AS = DL.getAllocaAddrSpace(); + Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS, Arg->getName() + ".priv", IP); createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, ArgIt->getArgNo(), *IP); if (AI->getType() != Arg->getType()) - AI = - BitCastInst::CreateBitOrPointerCast(AI, Arg->getType(), "", IP); + AI = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( + AI, Arg->getType(), "", IP); Arg->replaceAllUsesWith(AI); for (CallInst *CI : TailCalls) diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 2d765fb6ce6d..520b6ebf9e74 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1458,7 +1458,6 @@ private: case Intrinsic::nvvm_barrier0_and: case Intrinsic::nvvm_barrier0_or: case Intrinsic::nvvm_barrier0_popc: - case Intrinsic::amdgcn_s_barrier: return true; default: break; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 8f5933b7bd71..ddc747a2ca29 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -655,10 +655,13 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( case Instruction::IntToPtr: { assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI)); Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0); - assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); - if (Src->getType() != NewPtrType) - return new BitCastInst(Src, NewPtrType); - return Src; + if (Src->getType() == NewPtrType) + return Src; + + // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a + // source address space from a generic pointer source need to insert a cast + // back. + return CastInst::CreatePointerBitCastOrAddrSpaceCast(Src, NewPtrType); } default: llvm_unreachable("Unexpected opcode"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3290439ecd07..21c16f07e237 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1701,6 +1701,11 @@ public: private: unsigned NumPredStores = 0; + /// Convenience function that returns the value of vscale_range iff + /// vscale_range.min == vscale_range.max or otherwise returns the value + /// returned by the corresponding TLI method. + Optional<unsigned> getVScaleForTuning() const; + /// \return An upper bound for the vectorization factors for both /// fixed and scalable vectorization, where the minimum-known number of /// elements is a power-of-2 larger than zero. If scalable vectorization is @@ -5600,6 +5605,18 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget( return MaxVF; } +Optional<unsigned> LoopVectorizationCostModel::getVScaleForTuning() const { + if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) { + auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange); + auto Min = Attr.getVScaleRangeMin(); + auto Max = Attr.getVScaleRangeMax(); + if (Max && Min == Max) + return Max; + } + + return TTI.getVScaleForTuning(); +} + bool LoopVectorizationCostModel::isMoreProfitable( const VectorizationFactor &A, const VectorizationFactor &B) const { InstructionCost CostA = A.Cost; @@ -5624,7 +5641,7 @@ bool LoopVectorizationCostModel::isMoreProfitable( // Improve estimate for the vector width if it is scalable. unsigned EstimatedWidthA = A.Width.getKnownMinValue(); unsigned EstimatedWidthB = B.Width.getKnownMinValue(); - if (Optional<unsigned> VScale = TTI.getVScaleForTuning()) { + if (Optional<unsigned> VScale = getVScaleForTuning()) { if (A.Width.isScalable()) EstimatedWidthA *= VScale.getValue(); if (B.Width.isScalable()) @@ -5673,7 +5690,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( #ifndef NDEBUG unsigned AssumedMinimumVscale = 1; - if (Optional<unsigned> VScale = TTI.getVScaleForTuning()) + if (Optional<unsigned> VScale = getVScaleForTuning()) AssumedMinimumVscale = VScale.getValue(); unsigned Width = Candidate.Width.isScalable() @@ -5885,8 +5902,20 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( return Result; } + // If MainLoopVF = vscale x 2, and vscale is expected to be 4, then we know + // the main loop handles 8 lanes per iteration. We could still benefit from + // vectorizing the epilogue loop with VF=4. + ElementCount EstimatedRuntimeVF = MainLoopVF; + if (MainLoopVF.isScalable()) { + EstimatedRuntimeVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue()); + if (Optional<unsigned> VScale = getVScaleForTuning()) + EstimatedRuntimeVF *= VScale.getValue(); + } + for (auto &NextVF : ProfitableVFs) - if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) && + if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() && + ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) || + ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) && (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) && LVP.hasPlanWithVF(NextVF.Width)) Result = NextVF; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 15b349f53fd9..25bf69729c70 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -471,36 +471,17 @@ static bool isValidForAlternation(unsigned Opcode) { return true; } -static InstructionsState getSameOpcode(ArrayRef<Value *> VL, - unsigned BaseIndex = 0); - -/// Checks if the provided operands of 2 cmp instructions are compatible, i.e. -/// compatible instructions or constants, or just some other regular values. -static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0, - Value *Op1) { - return (isConstant(BaseOp0) && isConstant(Op0)) || - (isConstant(BaseOp1) && isConstant(Op1)) || - (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) && - !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) || - getSameOpcode({BaseOp0, Op0}).getOpcode() || - getSameOpcode({BaseOp1, Op1}).getOpcode(); -} - /// \returns analysis of the Instructions in \p VL described in /// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. static InstructionsState getSameOpcode(ArrayRef<Value *> VL, - unsigned BaseIndex) { + unsigned BaseIndex = 0) { // Make sure these are all Instructions. if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); })) return InstructionsState(VL[BaseIndex], nullptr, nullptr); bool IsCastOp = isa<CastInst>(VL[BaseIndex]); bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]); - bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]); - CmpInst::Predicate BasePred = - IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate() - : CmpInst::BAD_ICMP_PREDICATE; unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode(); unsigned AltOpcode = Opcode; unsigned AltIndex = BaseIndex; @@ -533,57 +514,6 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL, continue; } } - } else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) { - auto *BaseInst = cast<Instruction>(VL[BaseIndex]); - auto *Inst = cast<Instruction>(VL[Cnt]); - Type *Ty0 = BaseInst->getOperand(0)->getType(); - Type *Ty1 = Inst->getOperand(0)->getType(); - if (Ty0 == Ty1) { - Value *BaseOp0 = BaseInst->getOperand(0); - Value *BaseOp1 = BaseInst->getOperand(1); - Value *Op0 = Inst->getOperand(0); - Value *Op1 = Inst->getOperand(1); - CmpInst::Predicate CurrentPred = - cast<CmpInst>(VL[Cnt])->getPredicate(); - CmpInst::Predicate SwappedCurrentPred = - CmpInst::getSwappedPredicate(CurrentPred); - // Check for compatible operands. If the corresponding operands are not - // compatible - need to perform alternate vectorization. - if (InstOpcode == Opcode) { - if (BasePred == CurrentPred && - areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1)) - continue; - if (BasePred == SwappedCurrentPred && - areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0)) - continue; - if (E == 2 && - (BasePred == CurrentPred || BasePred == SwappedCurrentPred)) - continue; - auto *AltInst = cast<CmpInst>(VL[AltIndex]); - CmpInst::Predicate AltPred = AltInst->getPredicate(); - Value *AltOp0 = AltInst->getOperand(0); - Value *AltOp1 = AltInst->getOperand(1); - // Check if operands are compatible with alternate operands. - if (AltPred == CurrentPred && - areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1)) - continue; - if (AltPred == SwappedCurrentPred && - areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0)) - continue; - } - if (BaseIndex == AltIndex) { - assert(isValidForAlternation(Opcode) && - isValidForAlternation(InstOpcode) && - "Cast isn't safe for alternation, logic needs to be updated!"); - AltIndex = Cnt; - continue; - } - auto *AltInst = cast<CmpInst>(VL[AltIndex]); - CmpInst::Predicate AltPred = AltInst->getPredicate(); - if (BasePred == CurrentPred || BasePred == SwappedCurrentPred || - AltPred == CurrentPred || AltPred == SwappedCurrentPred) - continue; - } } else if (InstOpcode == Opcode || InstOpcode == AltOpcode) continue; return InstructionsState(VL[BaseIndex], nullptr, nullptr); @@ -4424,41 +4354,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. - auto *CI = dyn_cast<CmpInst>(VL0); - if (isa<BinaryOperator>(VL0) || CI) { + if (isa<BinaryOperator>(VL0)) { ValueList Left, Right; - if (!CI || all_of(VL, [](Value *V) { - return cast<CmpInst>(V)->isCommutative(); - })) { - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); - } else { - CmpInst::Predicate P0 = CI->getPredicate(); - CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate(); - CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0); - Value *BaseOp0 = VL0->getOperand(0); - Value *BaseOp1 = VL0->getOperand(1); - // Collect operands - commute if it uses the swapped predicate or - // alternate operation. - for (Value *V : VL) { - auto *Cmp = cast<CmpInst>(V); - Value *LHS = Cmp->getOperand(0); - Value *RHS = Cmp->getOperand(1); - CmpInst::Predicate CurrentPred = CI->getPredicate(); - CmpInst::Predicate CurrentPredSwapped = - CmpInst::getSwappedPredicate(CurrentPred); - if (P0 == AltP0 || P0 == AltP0Swapped) { - if ((P0 == CurrentPred && - !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) || - (P0 == CurrentPredSwapped && - !areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS))) - std::swap(LHS, RHS); - } else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) { - std::swap(LHS, RHS); - } - Left.push_back(LHS); - Right.push_back(RHS); - } - } + reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); TE->setOperand(0, Left); TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); @@ -5390,8 +5288,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ((Instruction::isBinaryOp(E->getOpcode()) && Instruction::isBinaryOp(E->getAltOpcode())) || (Instruction::isCast(E->getOpcode()) && - Instruction::isCast(E->getAltOpcode())) || - (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); InstructionCost ScalarCost = 0; if (NeedToShuffleReuses) { @@ -5439,14 +5336,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); - } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { - VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, - Builder.getInt1Ty(), - CI0->getPredicate(), CostKind, VL0); - VecCost += TTI->getCmpSelInstrCost( - E->getOpcode(), ScalarTy, Builder.getInt1Ty(), - cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind, - E->getAltOp()); } else { Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); @@ -5463,29 +5352,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, [E](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) { - auto *AltCI0 = cast<CmpInst>(E->getAltOp()); - auto *CI = cast<CmpInst>(I); - CmpInst::Predicate P0 = CI0->getPredicate(); - CmpInst::Predicate AltP0 = AltCI0->getPredicate(); - CmpInst::Predicate AltP0Swapped = - CmpInst::getSwappedPredicate(AltP0); - CmpInst::Predicate CurrentPred = CI->getPredicate(); - CmpInst::Predicate CurrentPredSwapped = - CmpInst::getSwappedPredicate(CurrentPred); - if (P0 == AltP0 || P0 == AltP0Swapped) { - // Alternate cmps have same/swapped predicate as main cmps but - // different order of compatible operands. - return !( - (P0 == CurrentPred && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(0), I->getOperand(1))) || - (P0 == CurrentPredSwapped && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(1), I->getOperand(0)))); - } - return CurrentPred != P0 && CurrentPredSwapped != P0; - } return I->getOpcode() == E->getAltOpcode(); }, Mask); @@ -6968,12 +6834,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ((Instruction::isBinaryOp(E->getOpcode()) && Instruction::isBinaryOp(E->getAltOpcode())) || (Instruction::isCast(E->getOpcode()) && - Instruction::isCast(E->getAltOpcode())) || - (isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); Value *LHS = nullptr, *RHS = nullptr; - if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) { + if (Instruction::isBinaryOp(E->getOpcode())) { setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); RHS = vectorizeTree(E->getOperand(1)); @@ -6993,15 +6858,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS); V1 = Builder.CreateBinOp( static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS); - } else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) { - V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS); - auto *AltCI = cast<CmpInst>(E->getAltOp()); - CmpInst::Predicate AltPred = AltCI->getPredicate(); - unsigned AltIdx = - std::distance(E->Scalars.begin(), find(E->Scalars, AltCI)); - if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx]) - AltPred = CmpInst::getSwappedPredicate(AltPred); - V1 = Builder.CreateCmp(AltPred, LHS, RHS); } else { V0 = Builder.CreateCast( static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy); @@ -7026,29 +6882,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, [E](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) { - auto *AltCI0 = cast<CmpInst>(E->getAltOp()); - auto *CI = cast<CmpInst>(I); - CmpInst::Predicate P0 = CI0->getPredicate(); - CmpInst::Predicate AltP0 = AltCI0->getPredicate(); - CmpInst::Predicate AltP0Swapped = - CmpInst::getSwappedPredicate(AltP0); - CmpInst::Predicate CurrentPred = CI->getPredicate(); - CmpInst::Predicate CurrentPredSwapped = - CmpInst::getSwappedPredicate(CurrentPred); - if (P0 == AltP0 || P0 == AltP0Swapped) { - // Alternate cmps have same/swapped predicate as main cmps but - // different order of compatible operands. - return !( - (P0 == CurrentPred && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(0), I->getOperand(1))) || - (P0 == CurrentPredSwapped && - areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - I->getOperand(1), I->getOperand(0)))); - } - return CurrentPred != P0 && CurrentPredSwapped != P0; - } return I->getOpcode() == E->getAltOpcode(); }, Mask, &OpScalars, &AltScalars); |
