diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2024-08-25 11:12:58 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2024-10-23 18:27:07 +0000 |
| commit | 62987288060ff68c817b7056815aa9fb8ba8ecd7 (patch) | |
| tree | d7953c9488f71e79d26c924169988f16e9ba9cee /contrib/llvm-project/llvm/lib | |
| parent | 52418fc2be8efa5172b90a3a9e617017173612c4 (diff) | |
| parent | adf62863f35c84e4c5708f3dc5a1589ce958a238 (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
27 files changed, 427 insertions, 667 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 84214c47a10e..f3fc69c86cd1 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -728,11 +728,6 @@ public: MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } - const DenseMap<Value *, SmallVector<const Value *, 16>> & - getUnderlyingObjects() { - return UnderlyingObjects; - } - private: typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; @@ -1459,22 +1454,23 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// Check whether the access through \p Ptr has a constant stride. -std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE, - Type *AccessTy, Value *Ptr, - const Loop *Lp, - const DenseMap<Value *, const SCEV *> &StridesMap, - bool Assume, bool ShouldCheckWrap) { +std::optional<int64_t> +llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const Loop *Lp, + const DenseMap<Value *, const SCEV *> &StridesMap, + bool Assume, bool ShouldCheckWrap) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); + if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) + return {0}; + Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); - if (isa<ScalableVectorType>(AccessTy)) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy << "\n"); return std::nullopt; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); if (Assume && !AR) AR = PSE.getAsAddRec(Ptr); @@ -1899,24 +1895,12 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, return ScaledDist % Stride; } -/// Returns true if any of the underlying objects has a loop varying address, -/// i.e. may change in \p L. -static bool -isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects, - ScalarEvolution &SE, const Loop *L) { - return any_of(UnderlyingObjects, [&SE, L](const Value *UO) { - return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L); - }); -} - std::variant<MemoryDepChecker::Dependence::DepType, MemoryDepChecker::DepDistanceStrideAndSizeInfo> MemoryDepChecker::getDependenceDistanceStrideAndSize( const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, - const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects) { - auto &DL = InnermostLoop->getHeader()->getDataLayout(); + const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) { + const auto &DL = InnermostLoop->getHeader()->getDataLayout(); auto &SE = *PSE.getSE(); auto [APtr, AIsWrite] = A; auto [BPtr, BIsWrite] = B; @@ -1933,12 +1917,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( BPtr->getType()->getPointerAddressSpace()) return MemoryDepChecker::Dependence::Unknown; - int64_t StrideAPtr = - getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true) - .value_or(0); - int64_t StrideBPtr = - getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true) - .value_or(0); + std::optional<int64_t> StrideAPtr = + getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true); + std::optional<int64_t> StrideBPtr = + getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true); const SCEV *Src = PSE.getSCEV(APtr); const SCEV *Sink = PSE.getSCEV(BPtr); @@ -1946,26 +1928,19 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( // If the induction step is negative we have to invert source and sink of the // dependence when measuring the distance between them. We should not swap // AIsWrite with BIsWrite, as their uses expect them in program order. - if (StrideAPtr < 0) { + if (StrideAPtr && *StrideAPtr < 0) { std::swap(Src, Sink); std::swap(AInst, BInst); + std::swap(StrideAPtr, StrideBPtr); } const SCEV *Dist = SE.getMinusSCEV(Sink, Src); LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "\n"); LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst << ": " << *Dist << "\n"); - // Needs accesses where the addresses of the accessed underlying objects do - // not change within the loop. - if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE, - InnermostLoop) || - isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE, - InnermostLoop)) - return MemoryDepChecker::Dependence::IndirectUnsafe; - // Check if we can prove that Sink only accesses memory after Src's end or // vice versa. At the moment this is limited to cases where either source or // sink are loop invariant to avoid compile-time increases. This is not @@ -1987,12 +1962,33 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( } } - // Need accesses with constant strides and the same direction. We don't want - // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that - // could wrap in the address space. - if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) || - (StrideAPtr < 0 && StrideBPtr > 0)) { + // Need accesses with constant strides and the same direction for further + // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and + // similar code or pointer arithmetic that could wrap in the address space. + + // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and + // not loop-invariant (stride will be 0 in that case), we cannot analyze the + // dependence further and also cannot generate runtime checks. + if (!StrideAPtr || !StrideBPtr) { LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); + return MemoryDepChecker::Dependence::IndirectUnsafe; + } + + int64_t StrideAPtrInt = *StrideAPtr; + int64_t StrideBPtrInt = *StrideBPtr; + LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt + << " Sink induction step: " << StrideBPtrInt << "\n"); + // At least Src or Sink are loop invariant and the other is strided or + // invariant. We can generate a runtime check to disambiguate the accesses. + if (StrideAPtrInt == 0 || StrideBPtrInt == 0) + return MemoryDepChecker::Dependence::Unknown; + + // Both Src and Sink have a constant stride, check if they are in the same + // direction. + if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) || + (StrideAPtrInt < 0 && StrideBPtrInt > 0)) { + LLVM_DEBUG( + dbgs() << "Pointer access with strides in different directions\n"); return MemoryDepChecker::Dependence::Unknown; } @@ -2001,22 +1997,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); if (!HasSameSize) TypeByteSize = 0; - return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr), - std::abs(StrideBPtr), TypeByteSize, + return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt), + std::abs(StrideBPtrInt), TypeByteSize, AIsWrite, BIsWrite); } -MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( - const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, - unsigned BIdx, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects) { +MemoryDepChecker::Dependence::DepType +MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx) { assert(AIdx < BIdx && "Must pass arguments in program order"); // Get the dependence distance, stride, type size and what access writes for // the dependence between A and B. - auto Res = getDependenceDistanceStrideAndSize( - A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects); + auto Res = + getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]); if (std::holds_alternative<Dependence::DepType>(Res)) return std::get<Dependence::DepType>(Res); @@ -2250,10 +2244,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( return Dependence::BackwardVectorizable; } -bool MemoryDepChecker::areDepsSafe( - DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, - const DenseMap<Value *, SmallVector<const Value *, 16>> - &UnderlyingObjects) { +bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets, + const MemAccessInfoList &CheckDeps) { MinDepDistBytes = -1; SmallPtrSet<MemAccessInfo, 8> Visited; @@ -2296,8 +2288,8 @@ bool MemoryDepChecker::areDepsSafe( if (*I1 > *I2) std::swap(A, B); - Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, - B.second, UnderlyingObjects); + Dependence::DepType Type = + isDependent(*A.first, A.second, *B.first, B.second); mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences @@ -2652,8 +2644,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, - Accesses.getDependenciesToCheck(), - Accesses.getUnderlyingObjects()); + Accesses.getDependenciesToCheck()); if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index e1cb63a9ab8f..0d7eb7da8d6b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -822,16 +822,16 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { AAMDNodes New = *this; MDNode *M = New.TBAAStruct; - if (M && M->getNumOperands() >= 3 && M->getOperand(0) && + if (!New.TBAA && M && M->getNumOperands() >= 3 && M->getOperand(0) && mdconst::hasa<ConstantInt>(M->getOperand(0)) && mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() && M->getOperand(1) && mdconst::hasa<ConstantInt>(M->getOperand(1)) && mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == AccessSize && - M->getOperand(2) && isa<MDNode>(M->getOperand(2))) { - New.TBAAStruct = nullptr; + M->getOperand(2) && isa<MDNode>(M->getOperand(2))) New.TBAA = cast<MDNode>(M->getOperand(2)); - } + + New.TBAAStruct = nullptr; return New; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 68a8a273a1b4..eb010afd41b6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3889,6 +3889,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { F.getSubprogram(), &F.getEntryBlock()); R << "unable to translate in big endian mode"; reportTranslationError(*MF, *TPC, *ORE, R); + return false; } // Release the per-function state when we return, whether we succeeded or not. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp index cd5d877e53d8..f4490873cfdc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { << ore::NV("Function", MF.getFunction().getName()) << "'"; }); + // Emit any remarks implemented for the target, based on final frame layout. + TFI->emitRemarks(MF, ORE); + delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenData.cpp b/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenData.cpp deleted file mode 100644 index 49b744744095..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenData.cpp +++ /dev/null @@ -1,196 +0,0 @@ -//===-- CodeGenData.cpp ---------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for codegen data that has stable summary which -// can be used to optimize the code in the subsequent codegen. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Bitcode/BitcodeWriter.h" -#include "llvm/CodeGenData/CodeGenDataReader.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/WithColor.h" - -#define DEBUG_TYPE "cg-data" - -using namespace llvm; -using namespace cgdata; - -static std::string getCGDataErrString(cgdata_error Err, - const std::string &ErrMsg = "") { - std::string Msg; - raw_string_ostream OS(Msg); - - switch (Err) { - case cgdata_error::success: - OS << "success"; - break; - case cgdata_error::eof: - OS << "end of File"; - break; - case cgdata_error::bad_magic: - OS << "invalid codegen data (bad magic)"; - break; - case cgdata_error::bad_header: - OS << "invalid codegen data (file header is corrupt)"; - break; - case cgdata_error::empty_cgdata: - OS << "empty codegen data"; - break; - case cgdata_error::malformed: - OS << "malformed codegen data"; - break; - case cgdata_error::unsupported_version: - OS << "unsupported codegen data version"; - break; - } - - // If optional error message is not empty, append it to the message. - if (!ErrMsg.empty()) - OS << ": " << ErrMsg; - - return OS.str(); -} - -namespace { - -// FIXME: This class is only here to support the transition to llvm::Error. It -// will be removed once this transition is complete. Clients should prefer to -// deal with the Error value directly, rather than converting to error_code. -class CGDataErrorCategoryType : public std::error_category { - const char *name() const noexcept override { return "llvm.cgdata"; } - - std::string message(int IE) const override { - return getCGDataErrString(static_cast<cgdata_error>(IE)); - } -}; - -} // end anonymous namespace - -const std::error_category &llvm::cgdata_category() { - static CGDataErrorCategoryType ErrorCategory; - return ErrorCategory; -} - -std::string CGDataError::message() const { - return getCGDataErrString(Err, Msg); -} - -char CGDataError::ID = 0; - -namespace { - -const char *CodeGenDataSectNameCommon[] = { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ - SectNameCommon, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -const char *CodeGenDataSectNameCoff[] = { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ - SectNameCoff, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -const char *CodeGenDataSectNamePrefix[] = { -#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix, -#include "llvm/CodeGenData/CodeGenData.inc" -}; - -} // namespace - -namespace llvm { - -std::string getCodeGenDataSectionName(CGDataSectKind CGSK, - Triple::ObjectFormatType OF, - bool AddSegmentInfo) { - std::string SectName; - - if (OF == Triple::MachO && AddSegmentInfo) - SectName = CodeGenDataSectNamePrefix[CGSK]; - - if (OF == Triple::COFF) - SectName += CodeGenDataSectNameCoff[CGSK]; - else - SectName += CodeGenDataSectNameCommon[CGSK]; - - return SectName; -} - -std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr; -std::once_flag CodeGenData::OnceFlag; - -CodeGenData &CodeGenData::getInstance() { - std::call_once(CodeGenData::OnceFlag, []() { - Instance = std::unique_ptr<CodeGenData>(new CodeGenData()); - - // TODO: Initialize writer or reader mode for the client optimization. - }); - return *(Instance.get()); -} - -namespace IndexedCGData { - -Expected<Header> Header::readFromBuffer(const unsigned char *Curr) { - using namespace support; - - static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>, - "The header should be standard layout type since we use offset " - "of fields to read."); - Header H; - H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr); - if (H.Magic != IndexedCGData::Magic) - return make_error<CGDataError>(cgdata_error::bad_magic); - H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); - if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion) - return make_error<CGDataError>(cgdata_error::unsupported_version); - H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); - - switch (H.Version) { - // When a new field is added to the header add a case statement here to - // compute the size as offset of the new field + size of the new field. This - // relies on the field being added to the end of the list. - static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1, - "Please update the size computation below if a new field has " - "been added to the header, if not add a case statement to " - "fall through to the latest version."); - case 1ull: - H.OutlinedHashTreeOffset = - endian::readNext<uint64_t, endianness::little, unaligned>(Curr); - } - - return H; -} - -} // end namespace IndexedCGData - -namespace cgdata { - -void warn(Twine Message, std::string Whence, std::string Hint) { - WithColor::warning(); - if (!Whence.empty()) - errs() << Whence << ": "; - errs() << Message << "\n"; - if (!Hint.empty()) - WithColor::note() << Hint << "\n"; -} - -void warn(Error E, StringRef Whence) { - if (E.isA<CGDataError>()) { - handleAllErrors(std::move(E), [&](const CGDataError &IPE) { - warn(IPE.message(), Whence.str(), ""); - }); - } -} - -} // end namespace cgdata - -} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataReader.cpp b/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataReader.cpp deleted file mode 100644 index bcd61047079f..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataReader.cpp +++ /dev/null @@ -1,175 +0,0 @@ -//===- CodeGenDataReader.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for reading codegen data. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGenData/CodeGenDataReader.h" -#include "llvm/CodeGenData/OutlinedHashTreeRecord.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/MemoryBuffer.h" - -#define DEBUG_TYPE "cg-data-reader" - -using namespace llvm; - -namespace llvm { - -static Expected<std::unique_ptr<MemoryBuffer>> -setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { - auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() - : FS.getBufferForFile(Filename); - if (std::error_code EC = BufferOrErr.getError()) - return errorCodeToError(EC); - return std::move(BufferOrErr.get()); -} - -Error CodeGenDataReader::mergeFromObjectFile( - const object::ObjectFile *Obj, - OutlinedHashTreeRecord &GlobalOutlineRecord) { - Triple TT = Obj->makeTriple(); - auto CGOutLineName = - getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); - - for (auto &Section : Obj->sections()) { - Expected<StringRef> NameOrErr = Section.getName(); - if (!NameOrErr) - return NameOrErr.takeError(); - Expected<StringRef> ContentsOrErr = Section.getContents(); - if (!ContentsOrErr) - return ContentsOrErr.takeError(); - auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data()); - auto *EndData = Data + ContentsOrErr->size(); - - if (*NameOrErr == CGOutLineName) { - // In case dealing with an executable that has concatenated cgdata, - // we want to merge them into a single cgdata. - // Although it's not a typical workflow, we support this scenario. - while (Data != EndData) { - OutlinedHashTreeRecord LocalOutlineRecord; - LocalOutlineRecord.deserialize(Data); - GlobalOutlineRecord.merge(LocalOutlineRecord); - } - } - // TODO: Add support for other cgdata sections. - } - - return Error::success(); -} - -Error IndexedCodeGenDataReader::read() { - using namespace support; - - // The smallest header with the version 1 is 24 bytes - const unsigned MinHeaderSize = 24; - if (DataBuffer->getBufferSize() < MinHeaderSize) - return error(cgdata_error::bad_header); - - auto *Start = - reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart()); - auto *End = - reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd()); - if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header)) - return E; - - if (hasOutlinedHashTree()) { - const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset; - if (Ptr >= End) - return error(cgdata_error::eof); - HashTreeRecord.deserialize(Ptr); - } - - return success(); -} - -Expected<std::unique_ptr<CodeGenDataReader>> -CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) { - // Set up the buffer to read. - auto BufferOrError = setupMemoryBuffer(Path, FS); - if (Error E = BufferOrError.takeError()) - return std::move(E); - return CodeGenDataReader::create(std::move(BufferOrError.get())); -} - -Expected<std::unique_ptr<CodeGenDataReader>> -CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) { - if (Buffer->getBufferSize() == 0) - return make_error<CGDataError>(cgdata_error::empty_cgdata); - - std::unique_ptr<CodeGenDataReader> Reader; - // Create the reader. - if (IndexedCodeGenDataReader::hasFormat(*Buffer)) - Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer)); - else if (TextCodeGenDataReader::hasFormat(*Buffer)) - Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer)); - else - return make_error<CGDataError>(cgdata_error::malformed); - - // Initialize the reader and return the result. - if (Error E = Reader->read()) - return std::move(E); - - return std::move(Reader); -} - -bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) { - using namespace support; - if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic)) - return false; - - uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( - DataBuffer.getBufferStart()); - // Verify that it's magical. - return Magic == IndexedCGData::Magic; -} - -bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) { - // Verify that this really looks like plain ASCII text by checking a - // 'reasonable' number of characters (up to the magic size). - StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t)); - return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); }); -} -Error TextCodeGenDataReader::read() { - using namespace support; - - // Parse the custom header line by line. - for (; !Line.is_at_eof(); ++Line) { - // Skip empty or whitespace-only lines - if (Line->trim().empty()) - continue; - - if (!Line->starts_with(":")) - break; - StringRef Str = Line->drop_front().rtrim(); - if (Str.equals_insensitive("outlined_hash_tree")) - DataKind |= CGDataKind::FunctionOutlinedHashTree; - else - return error(cgdata_error::bad_header); - } - - // We treat an empty header (that is a comment # only) as a valid header. - if (Line.is_at_eof()) { - if (DataKind == CGDataKind::Unknown) - return Error::success(); - return error(cgdata_error::bad_header); - } - - // The YAML docs follow after the header. - const char *Pos = Line->data(); - size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) - - reinterpret_cast<size_t>(Pos); - yaml::Input YOS(StringRef(Pos, Size)); - if (hasOutlinedHashTree()) - HashTreeRecord.deserializeYAML(YOS); - - // TODO: Add more yaml cgdata in order - - return Error::success(); -} -} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataWriter.cpp b/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataWriter.cpp deleted file mode 100644 index 3c91a1b30345..000000000000 --- a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataWriter.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===- CodeGenDataWriter.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing codegen data. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGenData/CodeGenDataWriter.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/EndianStream.h" - -#define DEBUG_TYPE "cg-data-writer" - -using namespace llvm; - -namespace llvm { - -/// A struct to define how the data stream should be patched. -struct CGDataPatchItem { - uint64_t Pos; // Where to patch. - uint64_t *D; // Pointer to an array of source data. - int N; // Number of elements in \c D array. -}; - -// A wrapper class to abstract writer stream with support of bytes -// back patching. -class CGDataOStream { -public: - CGDataOStream(raw_fd_ostream &FD) - : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} - CGDataOStream(raw_string_ostream &STR) - : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} - - uint64_t tell() { return OS.tell(); } - void write(uint64_t V) { LE.write<uint64_t>(V); } - void write32(uint32_t V) { LE.write<uint32_t>(V); } - void write8(uint8_t V) { LE.write<uint8_t>(V); } - - // \c patch can only be called when all data is written and flushed. - // For raw_string_ostream, the patch is done on the target string - // directly and it won't be reflected in the stream's internal buffer. - void patch(ArrayRef<CGDataPatchItem> P) { - using namespace support; - - if (IsFDOStream) { - raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS); - const uint64_t LastPos = FDOStream.tell(); - for (const auto &K : P) { - FDOStream.seek(K.Pos); - for (int I = 0; I < K.N; I++) - write(K.D[I]); - } - // Reset the stream to the last position after patching so that users - // don't accidentally overwrite data. This makes it consistent with - // the string stream below which replaces the data directly. - FDOStream.seek(LastPos); - } else { - raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS); - std::string &Data = SOStream.str(); // with flush - for (const auto &K : P) { - for (int I = 0; I < K.N; I++) { - uint64_t Bytes = - endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]); - Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t), - (const char *)&Bytes, sizeof(uint64_t)); - } - } - } - } - - // If \c OS is an instance of \c raw_fd_ostream, this field will be - // true. Otherwise, \c OS will be an raw_string_ostream. - bool IsFDOStream; - raw_ostream &OS; - support::endian::Writer LE; -}; - -} // end namespace llvm - -void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) { - assert(Record.HashTree && "empty hash tree in the record"); - HashTreeRecord.HashTree = std::move(Record.HashTree); - - DataKind |= CGDataKind::FunctionOutlinedHashTree; -} - -Error CodeGenDataWriter::write(raw_fd_ostream &OS) { - CGDataOStream COS(OS); - return writeImpl(COS); -} - -Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) { - using namespace support; - IndexedCGData::Header Header; - Header.Magic = IndexedCGData::Magic; - Header.Version = IndexedCGData::Version; - - // Set the CGDataKind depending on the kind. - Header.DataKind = 0; - if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree)) - Header.DataKind |= - static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree); - - Header.OutlinedHashTreeOffset = 0; - - // Only write up to the CGDataKind. We need to remember the offset of the - // remaining fields to allow back-patching later. - COS.write(Header.Magic); - COS.write32(Header.Version); - COS.write32(Header.DataKind); - - // Save the location of Header.OutlinedHashTreeOffset field in \c COS. - OutlinedHashTreeOffset = COS.tell(); - - // Reserve the space for OutlinedHashTreeOffset field. - COS.write(0); - - return Error::success(); -} - -Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) { - if (Error E = writeHeader(COS)) - return E; - - uint64_t OutlinedHashTreeFieldStart = COS.tell(); - if (hasOutlinedHashTree()) - HashTreeRecord.serialize(COS.OS); - - // Back patch the offsets. - CGDataPatchItem PatchItems[] = { - {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}}; - COS.patch(PatchItems); - - return Error::success(); -} - -Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) { - if (hasOutlinedHashTree()) - OS << "# Outlined stable hash tree\n:outlined_hash_tree\n"; - - // TODO: Add more data types in this header - - return Error::success(); -} - -Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) { - if (Error E = writeHeaderText(OS)) - return E; - - yaml::Output YOS(OS); - if (hasOutlinedHashTree()) - HashTreeRecord.serializeYAML(YOS); - - // TODO: Write more yaml cgdata in order - - return Error::success(); -} diff --git a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp index 9c44eff7953a..01aaedcf7d54 100644 --- a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp +++ b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp @@ -12,7 +12,6 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/ADT/MapVector.h" -#include "llvm/Demangle/Demangle.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" @@ -1416,8 +1415,7 @@ bool FPPassManager::runOnFunction(Function &F) { // Store name outside of loop to avoid redundant calls. const StringRef Name = F.getName(); - llvm::TimeTraceScope FunctionScope( - "OptFunction", [&F]() { return demangle(F.getName().str()); }); + llvm::TimeTraceScope FunctionScope("OptFunction", Name); for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { FunctionPass *FP = getContainedPass(Index); diff --git a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp index fc7b82d522bf..4eff2deef9ab 100644 --- a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineVerifier.h" -#include "llvm/Demangle/Demangle.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -236,12 +235,12 @@ void printIR(raw_ostream &OS, const MachineFunction *MF) { MF->print(OS); } -std::string getIRName(Any IR, bool demangled = false) { +std::string getIRName(Any IR) { if (unwrapIR<Module>(IR)) return "[module]"; if (const auto *F = unwrapIR<Function>(IR)) - return demangled ? demangle(F->getName()) : F->getName().str(); + return F->getName().str(); if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR)) return C->getName(); @@ -251,7 +250,7 @@ std::string getIRName(Any IR, bool demangled = false) { L->getHeader()->getParent()->getName().str(); if (const auto *MF = unwrapIR<MachineFunction>(IR)) - return demangled ? demangle(MF->getName()) : MF->getName().str(); + return MF->getName().str(); llvm_unreachable("Unknown wrapped IR type"); } @@ -1589,7 +1588,7 @@ void TimeProfilingPassesHandler::registerCallbacks( } void TimeProfilingPassesHandler::runBeforePass(StringRef PassID, Any IR) { - timeTraceProfilerBegin(PassID, getIRName(IR, true)); + timeTraceProfilerBegin(PassID, getIRName(IR)); } void TimeProfilingPassesHandler::runAfterPass() { timeTraceProfilerEnd(); } diff --git a/contrib/llvm-project/llvm/lib/Support/regcomp.c b/contrib/llvm-project/llvm/lib/Support/regcomp.c index 990aef32a396..daa41eb4912e 100644 --- a/contrib/llvm-project/llvm/lib/Support/regcomp.c +++ b/contrib/llvm-project/llvm/lib/Support/regcomp.c @@ -278,7 +278,7 @@ static char nuls[10]; /* place to point scanner in event of error */ #else #define DUPMAX 255 #endif -#define INFINITY (DUPMAX + 1) +#define REGINFINITY (DUPMAX + 1) #ifndef NDEBUG static int never = 0; /* for use in asserts; shuts lint up */ @@ -582,7 +582,7 @@ p_ere_exp(struct parse *p) count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ - count2 = INFINITY; + count2 = REGINFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); @@ -753,7 +753,7 @@ p_simp_re(struct parse *p, count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ - count2 = INFINITY; + count2 = REGINFINITY; } else /* just a single number */ count2 = count; repeat(p, pos, count, count2); @@ -1115,7 +1115,7 @@ repeat(struct parse *p, # define N 2 # define INF 3 # define REP(f, t) ((f)*8 + (t)) -# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGINFINITY) ? INF : N) sopno copy; if (p->error != 0) /* head off possible runaway recursion */ diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 3c9b07ad45bf..c64454cc253c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1292,6 +1292,13 @@ void AArch64AsmPrinter::emitGlobalAlias(const Module &M, StringRef ExpStr = cast<MDString>(Node->getOperand(0))->getString(); MCSymbol *ExpSym = MMI->getContext().getOrCreateSymbol(ExpStr); MCSymbol *Sym = MMI->getContext().getOrCreateSymbol(GA.getName()); + + OutStreamer->beginCOFFSymbolDef(ExpSym); + OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + OutStreamer->endCOFFSymbolDef(); + OutStreamer->beginCOFFSymbolDef(Sym); OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 2bc14f9821e6..161cf24dd403 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -108,6 +108,10 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) { case AArch64::LDUMINW: case AArch64::LDUMINX: case AArch64::LDUMINLB: case AArch64::LDUMINLH: case AArch64::LDUMINLW: case AArch64::LDUMINLX: + case AArch64::SWPB: case AArch64::SWPH: + case AArch64::SWPW: case AArch64::SWPX: + case AArch64::SWPLB: case AArch64::SWPLH: + case AArch64::SWPLW: case AArch64::SWPLX: return true; } return false; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td index a1ae0873fc19..a4f8f8c2d962 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td @@ -438,6 +438,15 @@ def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1", def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_SVE_B16B16", "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>; +// FeatureSVEB16B16 and FeatureSMEB16B16 act as aliases for {FeatureB16B16}, and +// {FeatureB16B16, FeatureSME2} respectively. This allows LLVM-20 interfacing programs +// that use '+sve-b16b16' and '+sme-b16b16' to compile in LLVM-19. +def FeatureSVEB16B16 : ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16", + "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions", [FeatureB16B16]>; + +def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16", + "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions", [FeatureSME2, FeatureB16B16]>; + def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16", "Enable SME non-widening Float16 instructions", [FeatureSME2]>; @@ -778,27 +787,26 @@ def HasV8_2aOps : Architecture64<8, 2, "a", "v8.2a", [HasV8_1aOps, FeaturePsUAO, FeaturePAN_RWV, FeatureRAS, FeatureCCPP], !listconcat(HasV8_1aOps.DefaultExts, [FeatureRAS])>; def HasV8_3aOps : Architecture64<8, 3, "a", "v8.3a", - [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureCCIDX, - FeatureComplxNum], + [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureComplxNum], !listconcat(HasV8_2aOps.DefaultExts, [FeatureComplxNum, FeatureJS, - FeaturePAuth, FeatureRCPC])>; + FeaturePAuth, FeatureRCPC, FeatureCCIDX])>; def HasV8_4aOps : Architecture64<8, 4, "a", "v8.4a", [HasV8_3aOps, FeatureDotProd, FeatureNV, FeatureMPAM, FeatureDIT, FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2], - !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd])>; + !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd, FeatureDIT, FeatureFlagM])>; def HasV8_5aOps : Architecture64<8, 5, "a", "v8.5a", [HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict, - FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, + FeatureSB, FeaturePredRes, FeatureCacheDeepPersist, FeatureBranchTargetId], - !listconcat(HasV8_4aOps.DefaultExts, [])>; + !listconcat(HasV8_4aOps.DefaultExts, [FeaturePredRes, FeatureSSBS, FeatureBranchTargetId, FeatureSB])>; def HasV8_6aOps : Architecture64<8, 6, "a", "v8.6a", [HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps, FeatureEnhancedCounterVirtualization, FeatureMatMulInt8], !listconcat(HasV8_5aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8])>; def HasV8_7aOps : Architecture64<8, 7, "a", "v8.7a", [HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX], - !listconcat(HasV8_6aOps.DefaultExts, [])>; + !listconcat(HasV8_6aOps.DefaultExts, [FeatureWFxT])>; def HasV8_8aOps : Architecture64<8, 8, "a", "v8.8a", [HasV8_7aOps, FeatureHBC, FeatureMOPS, FeatureNMI], !listconcat(HasV8_7aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>; @@ -816,7 +824,7 @@ def HasV9_1aOps : Architecture64<9, 1, "a", "v9.1a", !listconcat(HasV9_0aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8, FeatureRME])>; def HasV9_2aOps : Architecture64<9, 2, "a", "v9.2a", [HasV8_7aOps, HasV9_1aOps], - !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC])>; + !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC, FeatureWFxT])>; def HasV9_3aOps : Architecture64<9, 3, "a", "v9.3a", [HasV8_8aOps, HasV9_2aOps], !listconcat(HasV9_2aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>; @@ -833,7 +841,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r", //v8.2 FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV, //v8.3 - FeatureCCIDX, FeaturePAuth, FeatureRCPC, + FeaturePAuth, FeatureRCPC, //v8.4 FeatureTRACEV8_4, FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, @@ -844,7 +852,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r", // For v8-R, we do not enable crypto and align with GCC that enables a more // minimal set of optional architecture extensions. !listconcat( - !listremove(HasV8_5aOps.DefaultExts, [FeatureLSE]), + !listremove(HasV8_5aOps.DefaultExts, [FeatureBranchTargetId, FeaturePredRes]), [FeatureSSBS, FeatureFullFP16, FeatureFP16FML, FeatureSB] )>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bd530903bb66..ba46ededc63a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -240,6 +240,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog( // Stack hazard padding size. 0 = disabled. static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size", cl::init(0), cl::Hidden); +// Stack hazard size for analysis remarks. StackHazardSize takes precedence. +static cl::opt<unsigned> + StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), + cl::Hidden); // Whether to insert padding into non-streaming functions (for testing). static cl::opt<bool> StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", @@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, const auto &MFI = MF.getFrameInfo(); int64_t ObjectOffset = MFI.getObjectOffset(FI); + StackOffset SVEStackSize = getSVEStackSize(MF); + + // For VLA-area objects, just emit an offset at the end of the stack frame. + // Whilst not quite correct, these objects do live at the end of the frame and + // so it is more useful for analysis for the offset to reflect this. + if (MFI.isVariableSizedObjectIndex(FI)) { + return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize; + } // This is correct in the absence of any SVE stack objects. - StackOffset SVEStackSize = getSVEStackSize(MF); if (!SVEStackSize) return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); @@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } -// Return the FrameID for a Load/Store instruction by looking at the MMO. -static std::optional<int> getLdStFrameID(const MachineInstr &MI, - const MachineFrameInfo &MFI) { - if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) - return std::nullopt; - - MachineMemOperand *MMO = *MI.memoperands_begin(); +// Return the FrameID for a MMO. +static std::optional<int> getMMOFrameID(MachineMemOperand *MMO, + const MachineFrameInfo &MFI) { auto *PSV = dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue()); if (PSV) @@ -3552,6 +3560,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI, return std::nullopt; } +// Return the FrameID for a Load/Store instruction by looking at the first MMO. +static std::optional<int> getLdStFrameID(const MachineInstr &MI, + const MachineFrameInfo &MFI) { + if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) + return std::nullopt; + + return getMMOFrameID(*MI.memoperands_begin(), MFI); +} + // Check if a Hazard slot is needed for the current function, and if so create // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex, // which can be used to determine if any hazard padding is needed. @@ -5029,3 +5046,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF, MI->eraseFromParent(); } } + +struct StackAccess { + enum AccessType { + NotAccessed = 0, // Stack object not accessed by load/store instructions. + GPR = 1 << 0, // A general purpose register. + PPR = 1 << 1, // A predicate register. + FPR = 1 << 2, // A floating point/Neon/SVE register. + }; + + int Idx; + StackOffset Offset; + int64_t Size; + unsigned AccessTypes; + + StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {} + + bool operator<(const StackAccess &Rhs) const { + return std::make_tuple(start(), Idx) < + std::make_tuple(Rhs.start(), Rhs.Idx); + } + + bool isCPU() const { + // Predicate register load and store instructions execute on the CPU. + return AccessTypes & (AccessType::GPR | AccessType::PPR); + } + bool isSME() const { return AccessTypes & AccessType::FPR; } + bool isMixed() const { return isCPU() && isSME(); } + + int64_t start() const { return Offset.getFixed() + Offset.getScalable(); } + int64_t end() const { return start() + Size; } + + std::string getTypeString() const { + switch (AccessTypes) { + case AccessType::FPR: + return "FPR"; + case AccessType::PPR: + return "PPR"; + case AccessType::GPR: + return "GPR"; + case AccessType::NotAccessed: + return "NA"; + default: + return "Mixed"; + } + } + + void print(raw_ostream &OS) const { + OS << getTypeString() << " stack object at [SP" + << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed(); + if (Offset.getScalable()) + OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable() + << " * vscale"; + OS << "]"; + } +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) { + SA.print(OS); + return OS; +} + +void AArch64FrameLowering::emitRemarks( + const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const { + + SMEAttrs Attrs(MF.getFunction()); + if (Attrs.hasNonStreamingInterfaceAndBody()) + return; + + const uint64_t HazardSize = + (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize; + + if (HazardSize == 0) + return; + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + // Bail if function has no stack objects. + if (!MFI.hasStackObjects()) + return; + + std::vector<StackAccess> StackAccesses(MFI.getNumObjects()); + + size_t NumFPLdSt = 0; + size_t NumNonFPLdSt = 0; + + // Collect stack accesses via Load/Store instructions. + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) + continue; + for (MachineMemOperand *MMO : MI.memoperands()) { + std::optional<int> FI = getMMOFrameID(MMO, MFI); + if (FI && !MFI.isDeadObjectIndex(*FI)) { + int FrameIdx = *FI; + + size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects(); + if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) { + StackAccesses[ArrIdx].Idx = FrameIdx; + StackAccesses[ArrIdx].Offset = + getFrameIndexReferenceFromSP(MF, FrameIdx); + StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx); + } + + unsigned RegTy = StackAccess::AccessType::GPR; + if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) { + if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) + RegTy = StackAccess::PPR; + else + RegTy = StackAccess::FPR; + } else if (AArch64InstrInfo::isFpOrNEON(MI)) { + RegTy = StackAccess::FPR; + } + + StackAccesses[ArrIdx].AccessTypes |= RegTy; + + if (RegTy == StackAccess::FPR) + ++NumFPLdSt; + else + ++NumNonFPLdSt; + } + } + } + } + + if (NumFPLdSt == 0 || NumNonFPLdSt == 0) + return; + + llvm::sort(StackAccesses); + StackAccesses.erase(llvm::remove_if(StackAccesses, + [](const StackAccess &S) { + return S.AccessTypes == + StackAccess::NotAccessed; + }), + StackAccesses.end()); + + SmallVector<const StackAccess *> MixedObjects; + SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs; + + if (StackAccesses.front().isMixed()) + MixedObjects.push_back(&StackAccesses.front()); + + for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end()); + It != End; ++It) { + const auto &First = *It; + const auto &Second = *(It + 1); + + if (Second.isMixed()) + MixedObjects.push_back(&Second); + + if ((First.isSME() && Second.isCPU()) || + (First.isCPU() && Second.isSME())) { + uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end()); + if (Distance < HazardSize) + HazardPairs.emplace_back(&First, &Second); + } + } + + auto EmitRemark = [&](llvm::StringRef Str) { + ORE->emit([&]() { + auto R = MachineOptimizationRemarkAnalysis( + "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front()); + return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str; + }); + }; + + for (const auto &P : HazardPairs) + EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str()); + + for (const auto *Obj : MixedObjects) + EmitRemark( + formatv("{0} accessed by both GP and FP instructions", *Obj).str()); +} diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 0ebab1700e9c..c19731249620 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -13,8 +13,9 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H -#include "llvm/Support/TypeSize.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Support/TypeSize.h" namespace llvm { @@ -178,6 +179,9 @@ private: inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI, int64_t NegProbeSize, Register TargetReg) const; + + void emitRemarks(const MachineFunction &MF, + MachineOptimizationRemarkEmitter *ORE) const override; }; } // End llvm namespace diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6d413a09407a..62078822c89b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17719,6 +17719,9 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, // and generate vecreduce.add(concat_vector(DOT, DOT2, ..)). static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST) { + if (!ST->isNeonAvailable()) + return SDValue(); + if (!ST->hasDotProd()) return performVecReduceAddCombineWithUADDLP(N, DAG); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td index 71384a23c49a..6df87fc6a815 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td @@ -688,6 +688,7 @@ def ProcessorFeatures { FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeaturePAuth, FeatureSSBS, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8,FeatureFullFP16, FeatureJS, FeatureLSE, @@ -695,6 +696,7 @@ def ProcessorFeatures { list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, @@ -703,6 +705,7 @@ def ProcessorFeatures { list<SubtargetFeature> A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, @@ -734,12 +737,14 @@ def ProcessorFeatures { FeaturePerfMon, FeatureRCPC, FeatureSPE, FeatureSSBS, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM]; list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureCCIDX, FeatureSSBS, FeatureETE, FeatureMTE, FeatureFP16FML, FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8, FeaturePAuth, FeatureFlagM, FeatureSB, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE, + FeatureCCIDX, FeatureFP16FML, FeatureSVE, FeatureTRBE, FeatureSVE2BitPerm, FeatureBF16, FeatureETE, FeaturePerfMon, FeatureMatMulInt8, FeatureSPE, @@ -749,6 +754,7 @@ def ProcessorFeatures { FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSPE_EEF, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, @@ -757,6 +763,7 @@ def ProcessorFeatures { FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A720AE = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSPE_EEF, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, @@ -765,6 +772,7 @@ def ProcessorFeatures { FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> A725 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE, FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS, @@ -800,6 +808,7 @@ def ProcessorFeatures { FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML, + FeatureCCIDX, FeaturePAuth, FeatureSSBS, FeatureSB, FeatureSVE, FeatureSVE2, FeatureFlagM, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -808,6 +817,7 @@ def ProcessorFeatures { FeatureSPE, FeatureBF16, FeatureMatMulInt8, FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16, FeatureFP16FML, + FeatureCCIDX, FeatureSB, FeaturePAuth, FeaturePredRes, FeatureFlagM, FeatureSSBS, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureDotProd]; @@ -815,11 +825,13 @@ def ProcessorFeatures { FeaturePerfMon, FeatureETE, FeatureTRBE, FeatureSPE, FeatureMTE, FeatureSVE2BitPerm, FeatureFP16FML, FeatureSPE_EEF, + FeatureCCIDX, FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes, FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureBF16]; list<SubtargetFeature> X925 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureCCIDX, FeatureETE, FeaturePerfMon, FeatureSPE, FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE, FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS, @@ -863,23 +875,26 @@ def ProcessorFeatures { list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, - FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, - FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureBF16, FeatureDotProd, FeatureMatMulInt8]; + FeatureComplxNum, FeatureCRC, FeatureJS, + FeatureLSE, FeaturePAuth, + FeatureRAS, FeatureRCPC, FeatureRDM, + FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS]; list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, FeatureHCX, - FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, - FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureBF16, FeatureDotProd, FeatureMatMulInt8]; + FeatureComplxNum, FeatureCRC, FeatureJS, + FeatureLSE, FeaturePAuth, + FeatureRAS, FeatureRCPC, FeatureRDM, + FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS]; list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, FeatureHCX, - FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, - FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM, - FeatureBF16, FeatureDotProd, FeatureMatMulInt8]; + FeatureComplxNum, FeatureCRC, FeatureJS, + FeatureLSE, FeaturePAuth, + FeatureRAS, FeatureRCPC, FeatureRDM, + FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS]; list<SubtargetFeature> AppleM4 = [HasV9_2aOps, FeatureSHA2, FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, @@ -909,6 +924,7 @@ def ProcessorFeatures { FeatureMatMulInt8, FeatureMTE, FeatureSVE2, FeatureSVE2BitPerm, FeatureTRBE, FeaturePerfMon, + FeatureCCIDX, FeatureDotProd, FeatureFullFP16, FeatureSB, FeatureSSBS, FeatureSVE, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -916,6 +932,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureMTE, FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSPE_EEF, FeatureSVE2BitPerm, + FeatureCCIDX, FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, @@ -926,6 +943,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSSBS, FeatureSVE, + FeatureCCIDX, FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -934,6 +952,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureMatMulInt8, FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSSBS, FeatureSVE, + FeatureCCIDX, FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; @@ -941,12 +960,14 @@ def ProcessorFeatures { FeaturePerfMon, FeatureETE, FeatureMatMulInt8, FeatureNEON, FeatureSVE2BitPerm, FeatureFP16FML, FeatureMTE, FeatureRandGen, + FeatureCCIDX, FeatureSVE, FeatureSVE2, FeatureSSBS, FeatureFullFP16, FeatureDotProd, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> NeoverseV3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML, FeatureFullFP16, FeatureLS64, FeatureMTE, FeaturePerfMon, FeatureRandGen, FeatureSPE, + FeatureCCIDX, FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE, FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, @@ -957,12 +978,14 @@ def ProcessorFeatures { FeaturePerfMon, FeatureRandGen, FeatureSPE, FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE, FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM, + FeatureCCIDX, FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureRME]; list<SubtargetFeature> Saphira = [HasV8_4aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeatureSPE, FeaturePerfMon, FeatureCRC, + FeatureCCIDX, FeatureLSE, FeatureRDM, FeatureRAS, FeatureRCPC]; list<SubtargetFeature> ThunderX = [HasV8_0aOps, FeatureCRC, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeaturePerfMon, FeatureNEON]; @@ -971,6 +994,7 @@ def ProcessorFeatures { FeatureRDM]; list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureSHA2, FeatureAES, FeatureFPARMv8, FeatureNEON, FeatureLSE, + FeatureCCIDX, FeaturePAuth, FeaturePerfMon, FeatureComplxNum, FeatureJS, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8, @@ -983,6 +1007,7 @@ def ProcessorFeatures { FeatureSHA2, FeatureSHA3, FeatureAES, FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, + FeatureCCIDX, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM]; list<SubtargetFeature> Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, FeatureMTE, FeatureSSBS, FeatureRandGen, @@ -991,6 +1016,7 @@ def ProcessorFeatures { FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, + FeatureCCIDX, FeatureRDM]; list<SubtargetFeature> Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon, FeatureMTE, FeatureSSBS, FeatureRandGen, @@ -999,6 +1025,7 @@ def ProcessorFeatures { FeatureWFxT, FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, + FeatureCCIDX, FeatureRDM]; list<SubtargetFeature> Oryon = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, @@ -1007,6 +1034,7 @@ def ProcessorFeatures { FeatureSHA3, FeatureAES, FeatureSPE, FeatureBF16, FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, + FeatureSSBS, FeatureCCIDX, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; // ETE and TRBE are future architecture extensions. We temporarily enable them diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 45148449dfb8..39fba6a257bb 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -254,7 +254,8 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return false; if (CallerAttrs.requiresLazySave(CalleeAttrs) || - CallerAttrs.requiresSMChange(CalleeAttrs)) { + CallerAttrs.requiresSMChange(CalleeAttrs) || + CallerAttrs.requiresPreservingZT0(CalleeAttrs)) { if (hasPossibleIncompatibleOps(Callee)) return false; } @@ -540,7 +541,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { InstructionCost AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. auto *RetTy = ICA.getReturnType(); + if (auto *VTy = dyn_cast<ScalableVectorType>(RetTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + switch (ICA.getID()) { case Intrinsic::experimental_vector_histogram_add: if (!ST->hasSVE2()) @@ -2295,6 +2304,11 @@ std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic( return std::nullopt; } +bool AArch64TTIImpl::enableScalableVectorization() const { + return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() && + EnableScalableAutovecInStreamingMode); +} + TypeSize AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { switch (K) { @@ -3018,6 +3032,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( ArrayRef<const Value *> Args, const Instruction *CxtI) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -3792,6 +3814,14 @@ InstructionCost AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) @@ -3836,6 +3866,14 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(ValTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + if (TTI::requiresOrderedReduction(FMF)) { if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) { InstructionCost BaseCost = diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index a9189fd53f40..4a6457d7a7db 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -381,7 +381,7 @@ public: return ST->isSVEorStreamingSVEAvailable(); } - bool enableScalableVectorization() const { return ST->isSVEAvailable(); } + bool enableScalableVectorization() const; bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9b2cab2eb73a..32ecf350db59 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1581,7 +1581,18 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { // Clamp is applied after omod, so it is OK if omod is set. DefClamp->setImm(1); - MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); + + Register DefReg = Def->getOperand(0).getReg(); + Register MIDstReg = MI.getOperand(0).getReg(); + if (TRI->isSGPRReg(*MRI, DefReg)) { + // Pseudo scalar instructions have a SGPR for dst and clamp is a v_max* + // instruction with a VGPR dst. + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), + MIDstReg) + .addReg(DefReg); + } else { + MRI->replaceRegWith(MIDstReg, DefReg); + } MI.eraseFromParent(); // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp index 9cc162d041f4..883808ae981f 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -50,6 +50,13 @@ using namespace llvm; +// Whether Big-endian GISel is enabled, defaults to off, can be enabled for +// testing. +static cl::opt<bool> + EnableGISelBigEndian("enable-arm-gisel-bigendian", cl::Hidden, + cl::init(false), + cl::desc("Enable Global-ISel Big Endian Lowering")); + ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) : CallLowering(&TLI) {} @@ -539,3 +546,5 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo & return true; } + +bool ARMCallLowering::enableBigEndian() const { return EnableGISelBigEndian; }
\ No newline at end of file diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h index 38095617fb4f..32c95a044d7b 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h @@ -42,6 +42,8 @@ public: bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; + bool enableBigEndian() const override; + private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs, diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index f0933765bbcb..86ce6b4e05ed 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) { if (ER.Kind == MachineOperand::MO_GlobalAddress) if (ER.V.GV->getName().empty()) return; + // Ignore block address that points to block in another function + if (ER.Kind == MachineOperand::MO_BlockAddress) + if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction())) + return; Extenders.push_back(ED); } diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp index bd8ef43da625..64a0e9321598 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp @@ -1608,8 +1608,8 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); - emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); - emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF); + emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[1]).addImm(0xFF); + emitInst(Mips::OR, DestReg).addReg(TempReg[0]).addReg(TempReg[2]); updateValueMap(II, DestReg); return true; } diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 1963582ce686..a57ed33bda9c 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1007,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // R0 cannot be used as a base register, but it can be used as an // index in a store-indexed. int LastOffset = 0; - if (HasFP) { + if (HasFP) { // R0 += (FPOffset-LastOffset). // Need addic, since addi treats R0 as 0. BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) @@ -2025,8 +2025,18 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, // code. Same goes for the base pointer and the PIC base register. if (needsFP(MF)) SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); - if (RegInfo->hasBasePointer(MF)) + if (RegInfo->hasBasePointer(MF)) { SavedRegs.reset(RegInfo->getBaseRegister(MF)); + // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match + // AIX trackback table requirement. + if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) && + Subtarget.isAIXABI()) { + assert( + (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) && + "Invalid base register on AIX!"); + SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31); + } + } if (FI->usesPICBase()) SavedRegs.reset(PPC::R30); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5a..be2e880ecd3a 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { auto *VTy = cast<VectorType>(II.getType()); IRBuilder<> Builder(&II); - - // Extend VL from i32 to XLen if needed. - if (ST->is64Bit()) - VL = Builder.CreateZExt(VL, Builder.getInt64Ty()); - Type *STy = VTy->getElementType(); Value *Val = Builder.CreateLoad(STy, BasePtr); - const auto &TLI = *ST->getTargetLowering(); - Value *Res; - - // TODO: Also support fixed/illegal vector types to splat with evl = vl. - if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) { - unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f - : Intrinsic::riscv_vmv_v_x; - Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()}, - {PoisonValue::get(VTy), Val, VL}); - } else { - Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val); - } + Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, + {Val, II.getOperand(2), VL}); II.replaceAllUsesWith(Res); II.eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 809be499ee0f..9d2990c98ce2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -171,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); // If the memcpy has metadata describing the members, see if we can get the - // TBAA tag describing our copy. + // TBAA, scope and noalias tags describing our copy. AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size); Value *Src = MI->getArgOperand(1); |
