aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-08-25 11:12:58 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-10-23 18:27:07 +0000
commit62987288060ff68c817b7056815aa9fb8ba8ecd7 (patch)
treed7953c9488f71e79d26c924169988f16e9ba9cee /contrib/llvm-project/llvm/lib
parent52418fc2be8efa5172b90a3a9e617017173612c4 (diff)
parentadf62863f35c84e4c5708f3dc5a1589ce958a238 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGenData/CodeGenData.cpp196
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataReader.cpp175
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataWriter.cpp162
-rw-r--r--contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Support/regcomp.c8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp204
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp2
27 files changed, 427 insertions, 667 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 84214c47a10e..f3fc69c86cd1 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -728,11 +728,6 @@ public:
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
- const DenseMap<Value *, SmallVector<const Value *, 16>> &
- getUnderlyingObjects() {
- return UnderlyingObjects;
- }
-
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
@@ -1459,22 +1454,23 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// Check whether the access through \p Ptr has a constant stride.
-std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
- Type *AccessTy, Value *Ptr,
- const Loop *Lp,
- const DenseMap<Value *, const SCEV *> &StridesMap,
- bool Assume, bool ShouldCheckWrap) {
+std::optional<int64_t>
+llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
+ const DenseMap<Value *, const SCEV *> &StridesMap,
+ bool Assume, bool ShouldCheckWrap) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
+ if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
+ return {0};
+
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
-
if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
return std::nullopt;
}
- const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
-
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (Assume && !AR)
AR = PSE.getAsAddRec(Ptr);
@@ -1899,24 +1895,12 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
return ScaledDist % Stride;
}
-/// Returns true if any of the underlying objects has a loop varying address,
-/// i.e. may change in \p L.
-static bool
-isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
- ScalarEvolution &SE, const Loop *L) {
- return any_of(UnderlyingObjects, [&SE, L](const Value *UO) {
- return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L);
- });
-}
-
std::variant<MemoryDepChecker::Dependence::DepType,
MemoryDepChecker::DepDistanceStrideAndSizeInfo>
MemoryDepChecker::getDependenceDistanceStrideAndSize(
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
- const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects) {
- auto &DL = InnermostLoop->getHeader()->getDataLayout();
+ const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
+ const auto &DL = InnermostLoop->getHeader()->getDataLayout();
auto &SE = *PSE.getSE();
auto [APtr, AIsWrite] = A;
auto [BPtr, BIsWrite] = B;
@@ -1933,12 +1917,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;
- int64_t StrideAPtr =
- getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
- .value_or(0);
- int64_t StrideBPtr =
- getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true)
- .value_or(0);
+ std::optional<int64_t> StrideAPtr =
+ getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
+ std::optional<int64_t> StrideBPtr =
+ getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1946,26 +1928,19 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
// If the induction step is negative we have to invert source and sink of the
// dependence when measuring the distance between them. We should not swap
// AIsWrite with BIsWrite, as their uses expect them in program order.
- if (StrideAPtr < 0) {
+ if (StrideAPtr && *StrideAPtr < 0) {
std::swap(Src, Sink);
std::swap(AInst, BInst);
+ std::swap(StrideAPtr, StrideBPtr);
}
const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
- << "(Induction step: " << StrideAPtr << ")\n");
+ << "\n");
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
<< ": " << *Dist << "\n");
- // Needs accesses where the addresses of the accessed underlying objects do
- // not change within the loop.
- if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE,
- InnermostLoop) ||
- isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
- InnermostLoop))
- return MemoryDepChecker::Dependence::IndirectUnsafe;
-
// Check if we can prove that Sink only accesses memory after Src's end or
// vice versa. At the moment this is limited to cases where either source or
// sink are loop invariant to avoid compile-time increases. This is not
@@ -1987,12 +1962,33 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
}
}
- // Need accesses with constant strides and the same direction. We don't want
- // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
- // could wrap in the address space.
- if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
- (StrideAPtr < 0 && StrideBPtr > 0)) {
+ // Need accesses with constant strides and the same direction for further
+ // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
+ // similar code or pointer arithmetic that could wrap in the address space.
+
+ // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
+ // not loop-invariant (stride will be 0 in that case), we cannot analyze the
+ // dependence further and also cannot generate runtime checks.
+ if (!StrideAPtr || !StrideBPtr) {
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
+ return MemoryDepChecker::Dependence::IndirectUnsafe;
+ }
+
+ int64_t StrideAPtrInt = *StrideAPtr;
+ int64_t StrideBPtrInt = *StrideBPtr;
+ LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
+ << " Sink induction step: " << StrideBPtrInt << "\n");
+ // At least Src or Sink are loop invariant and the other is strided or
+ // invariant. We can generate a runtime check to disambiguate the accesses.
+ if (StrideAPtrInt == 0 || StrideBPtrInt == 0)
+ return MemoryDepChecker::Dependence::Unknown;
+
+ // Both Src and Sink have a constant stride, check if they are in the same
+ // direction.
+ if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
+ (StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
+ LLVM_DEBUG(
+ dbgs() << "Pointer access with strides in different directions\n");
return MemoryDepChecker::Dependence::Unknown;
}
@@ -2001,22 +1997,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
if (!HasSameSize)
TypeByteSize = 0;
- return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
- std::abs(StrideBPtr), TypeByteSize,
+ return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt),
+ std::abs(StrideBPtrInt), TypeByteSize,
AIsWrite, BIsWrite);
}
-MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
- const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
- unsigned BIdx,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects) {
+MemoryDepChecker::Dependence::DepType
+MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx) {
assert(AIdx < BIdx && "Must pass arguments in program order");
// Get the dependence distance, stride, type size and what access writes for
// the dependence between A and B.
- auto Res = getDependenceDistanceStrideAndSize(
- A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
+ auto Res =
+ getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);
@@ -2250,10 +2244,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
return Dependence::BackwardVectorizable;
}
-bool MemoryDepChecker::areDepsSafe(
- DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
- const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects) {
+bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
+ const MemAccessInfoList &CheckDeps) {
MinDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
@@ -2296,8 +2288,8 @@ bool MemoryDepChecker::areDepsSafe(
if (*I1 > *I2)
std::swap(A, B);
- Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
- B.second, UnderlyingObjects);
+ Dependence::DepType Type =
+ isDependent(*A.first, A.second, *B.first, B.second);
mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
@@ -2652,8 +2644,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
- Accesses.getDependenciesToCheck(),
- Accesses.getUnderlyingObjects());
+ Accesses.getDependenciesToCheck());
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index e1cb63a9ab8f..0d7eb7da8d6b 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -822,16 +822,16 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
AAMDNodes New = *this;
MDNode *M = New.TBAAStruct;
- if (M && M->getNumOperands() >= 3 && M->getOperand(0) &&
+ if (!New.TBAA && M && M->getNumOperands() >= 3 && M->getOperand(0) &&
mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
M->getOperand(1) && mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
AccessSize &&
- M->getOperand(2) && isa<MDNode>(M->getOperand(2))) {
- New.TBAAStruct = nullptr;
+ M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
New.TBAA = cast<MDNode>(M->getOperand(2));
- }
+
+ New.TBAAStruct = nullptr;
return New;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 68a8a273a1b4..eb010afd41b6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3889,6 +3889,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
F.getSubprogram(), &F.getEntryBlock());
R << "unable to translate in big endian mode";
reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
}
// Release the per-function state when we return, whether we succeeded or not.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cd5d877e53d8..f4490873cfdc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
<< ore::NV("Function", MF.getFunction().getName()) << "'";
});
+ // Emit any remarks implemented for the target, based on final frame layout.
+ TFI->emitRemarks(MF, ORE);
+
delete RS;
SaveBlocks.clear();
RestoreBlocks.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenData.cpp b/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenData.cpp
deleted file mode 100644
index 49b744744095..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenData.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//===-- CodeGenData.cpp ---------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for codegen data that has stable summary which
-// can be used to optimize the code in the subsequent codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/CodeGenData/CodeGenDataReader.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/WithColor.h"
-
-#define DEBUG_TYPE "cg-data"
-
-using namespace llvm;
-using namespace cgdata;
-
-static std::string getCGDataErrString(cgdata_error Err,
- const std::string &ErrMsg = "") {
- std::string Msg;
- raw_string_ostream OS(Msg);
-
- switch (Err) {
- case cgdata_error::success:
- OS << "success";
- break;
- case cgdata_error::eof:
- OS << "end of File";
- break;
- case cgdata_error::bad_magic:
- OS << "invalid codegen data (bad magic)";
- break;
- case cgdata_error::bad_header:
- OS << "invalid codegen data (file header is corrupt)";
- break;
- case cgdata_error::empty_cgdata:
- OS << "empty codegen data";
- break;
- case cgdata_error::malformed:
- OS << "malformed codegen data";
- break;
- case cgdata_error::unsupported_version:
- OS << "unsupported codegen data version";
- break;
- }
-
- // If optional error message is not empty, append it to the message.
- if (!ErrMsg.empty())
- OS << ": " << ErrMsg;
-
- return OS.str();
-}
-
-namespace {
-
-// FIXME: This class is only here to support the transition to llvm::Error. It
-// will be removed once this transition is complete. Clients should prefer to
-// deal with the Error value directly, rather than converting to error_code.
-class CGDataErrorCategoryType : public std::error_category {
- const char *name() const noexcept override { return "llvm.cgdata"; }
-
- std::string message(int IE) const override {
- return getCGDataErrString(static_cast<cgdata_error>(IE));
- }
-};
-
-} // end anonymous namespace
-
-const std::error_category &llvm::cgdata_category() {
- static CGDataErrorCategoryType ErrorCategory;
- return ErrorCategory;
-}
-
-std::string CGDataError::message() const {
- return getCGDataErrString(Err, Msg);
-}
-
-char CGDataError::ID = 0;
-
-namespace {
-
-const char *CodeGenDataSectNameCommon[] = {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
- SectNameCommon,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-const char *CodeGenDataSectNameCoff[] = {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
- SectNameCoff,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-const char *CodeGenDataSectNamePrefix[] = {
-#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix,
-#include "llvm/CodeGenData/CodeGenData.inc"
-};
-
-} // namespace
-
-namespace llvm {
-
-std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
- Triple::ObjectFormatType OF,
- bool AddSegmentInfo) {
- std::string SectName;
-
- if (OF == Triple::MachO && AddSegmentInfo)
- SectName = CodeGenDataSectNamePrefix[CGSK];
-
- if (OF == Triple::COFF)
- SectName += CodeGenDataSectNameCoff[CGSK];
- else
- SectName += CodeGenDataSectNameCommon[CGSK];
-
- return SectName;
-}
-
-std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr;
-std::once_flag CodeGenData::OnceFlag;
-
-CodeGenData &CodeGenData::getInstance() {
- std::call_once(CodeGenData::OnceFlag, []() {
- Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
-
- // TODO: Initialize writer or reader mode for the client optimization.
- });
- return *(Instance.get());
-}
-
-namespace IndexedCGData {
-
-Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
- using namespace support;
-
- static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>,
- "The header should be standard layout type since we use offset "
- "of fields to read.");
- Header H;
- H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
- if (H.Magic != IndexedCGData::Magic)
- return make_error<CGDataError>(cgdata_error::bad_magic);
- H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
- if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion)
- return make_error<CGDataError>(cgdata_error::unsupported_version);
- H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
-
- switch (H.Version) {
- // When a new field is added to the header add a case statement here to
- // compute the size as offset of the new field + size of the new field. This
- // relies on the field being added to the end of the list.
- static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version1,
- "Please update the size computation below if a new field has "
- "been added to the header, if not add a case statement to "
- "fall through to the latest version.");
- case 1ull:
- H.OutlinedHashTreeOffset =
- endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
- }
-
- return H;
-}
-
-} // end namespace IndexedCGData
-
-namespace cgdata {
-
-void warn(Twine Message, std::string Whence, std::string Hint) {
- WithColor::warning();
- if (!Whence.empty())
- errs() << Whence << ": ";
- errs() << Message << "\n";
- if (!Hint.empty())
- WithColor::note() << Hint << "\n";
-}
-
-void warn(Error E, StringRef Whence) {
- if (E.isA<CGDataError>()) {
- handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
- warn(IPE.message(), Whence.str(), "");
- });
- }
-}
-
-} // end namespace cgdata
-
-} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataReader.cpp b/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataReader.cpp
deleted file mode 100644
index bcd61047079f..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataReader.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-//===- CodeGenDataReader.cpp ----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for reading codegen data.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGenData/CodeGenDataReader.h"
-#include "llvm/CodeGenData/OutlinedHashTreeRecord.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-#define DEBUG_TYPE "cg-data-reader"
-
-using namespace llvm;
-
-namespace llvm {
-
-static Expected<std::unique_ptr<MemoryBuffer>>
-setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
- auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
- : FS.getBufferForFile(Filename);
- if (std::error_code EC = BufferOrErr.getError())
- return errorCodeToError(EC);
- return std::move(BufferOrErr.get());
-}
-
-Error CodeGenDataReader::mergeFromObjectFile(
- const object::ObjectFile *Obj,
- OutlinedHashTreeRecord &GlobalOutlineRecord) {
- Triple TT = Obj->makeTriple();
- auto CGOutLineName =
- getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
-
- for (auto &Section : Obj->sections()) {
- Expected<StringRef> NameOrErr = Section.getName();
- if (!NameOrErr)
- return NameOrErr.takeError();
- Expected<StringRef> ContentsOrErr = Section.getContents();
- if (!ContentsOrErr)
- return ContentsOrErr.takeError();
- auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data());
- auto *EndData = Data + ContentsOrErr->size();
-
- if (*NameOrErr == CGOutLineName) {
- // In case dealing with an executable that has concatenated cgdata,
- // we want to merge them into a single cgdata.
- // Although it's not a typical workflow, we support this scenario.
- while (Data != EndData) {
- OutlinedHashTreeRecord LocalOutlineRecord;
- LocalOutlineRecord.deserialize(Data);
- GlobalOutlineRecord.merge(LocalOutlineRecord);
- }
- }
- // TODO: Add support for other cgdata sections.
- }
-
- return Error::success();
-}
-
-Error IndexedCodeGenDataReader::read() {
- using namespace support;
-
- // The smallest header with the version 1 is 24 bytes
- const unsigned MinHeaderSize = 24;
- if (DataBuffer->getBufferSize() < MinHeaderSize)
- return error(cgdata_error::bad_header);
-
- auto *Start =
- reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
- auto *End =
- reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
- if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
- return E;
-
- if (hasOutlinedHashTree()) {
- const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
- if (Ptr >= End)
- return error(cgdata_error::eof);
- HashTreeRecord.deserialize(Ptr);
- }
-
- return success();
-}
-
-Expected<std::unique_ptr<CodeGenDataReader>>
-CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
- // Set up the buffer to read.
- auto BufferOrError = setupMemoryBuffer(Path, FS);
- if (Error E = BufferOrError.takeError())
- return std::move(E);
- return CodeGenDataReader::create(std::move(BufferOrError.get()));
-}
-
-Expected<std::unique_ptr<CodeGenDataReader>>
-CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
- if (Buffer->getBufferSize() == 0)
- return make_error<CGDataError>(cgdata_error::empty_cgdata);
-
- std::unique_ptr<CodeGenDataReader> Reader;
- // Create the reader.
- if (IndexedCodeGenDataReader::hasFormat(*Buffer))
- Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
- else if (TextCodeGenDataReader::hasFormat(*Buffer))
- Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
- else
- return make_error<CGDataError>(cgdata_error::malformed);
-
- // Initialize the reader and return the result.
- if (Error E = Reader->read())
- return std::move(E);
-
- return std::move(Reader);
-}
-
-bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
- using namespace support;
- if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
- return false;
-
- uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
- DataBuffer.getBufferStart());
- // Verify that it's magical.
- return Magic == IndexedCGData::Magic;
-}
-
-bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
- // Verify that this really looks like plain ASCII text by checking a
- // 'reasonable' number of characters (up to the magic size).
- StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
- return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
-}
-Error TextCodeGenDataReader::read() {
- using namespace support;
-
- // Parse the custom header line by line.
- for (; !Line.is_at_eof(); ++Line) {
- // Skip empty or whitespace-only lines
- if (Line->trim().empty())
- continue;
-
- if (!Line->starts_with(":"))
- break;
- StringRef Str = Line->drop_front().rtrim();
- if (Str.equals_insensitive("outlined_hash_tree"))
- DataKind |= CGDataKind::FunctionOutlinedHashTree;
- else
- return error(cgdata_error::bad_header);
- }
-
- // We treat an empty header (that is a comment # only) as a valid header.
- if (Line.is_at_eof()) {
- if (DataKind == CGDataKind::Unknown)
- return Error::success();
- return error(cgdata_error::bad_header);
- }
-
- // The YAML docs follow after the header.
- const char *Pos = Line->data();
- size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
- reinterpret_cast<size_t>(Pos);
- yaml::Input YOS(StringRef(Pos, Size));
- if (hasOutlinedHashTree())
- HashTreeRecord.deserializeYAML(YOS);
-
- // TODO: Add more yaml cgdata in order
-
- return Error::success();
-}
-} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataWriter.cpp b/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataWriter.cpp
deleted file mode 100644
index 3c91a1b30345..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGenData/CodeGenDataWriter.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===- CodeGenDataWriter.cpp ----------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing codegen data.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGenData/CodeGenDataWriter.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/EndianStream.h"
-
-#define DEBUG_TYPE "cg-data-writer"
-
-using namespace llvm;
-
-namespace llvm {
-
-/// A struct to define how the data stream should be patched.
-struct CGDataPatchItem {
- uint64_t Pos; // Where to patch.
- uint64_t *D; // Pointer to an array of source data.
- int N; // Number of elements in \c D array.
-};
-
-// A wrapper class to abstract writer stream with support of bytes
-// back patching.
-class CGDataOStream {
-public:
- CGDataOStream(raw_fd_ostream &FD)
- : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
- CGDataOStream(raw_string_ostream &STR)
- : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
-
- uint64_t tell() { return OS.tell(); }
- void write(uint64_t V) { LE.write<uint64_t>(V); }
- void write32(uint32_t V) { LE.write<uint32_t>(V); }
- void write8(uint8_t V) { LE.write<uint8_t>(V); }
-
- // \c patch can only be called when all data is written and flushed.
- // For raw_string_ostream, the patch is done on the target string
- // directly and it won't be reflected in the stream's internal buffer.
- void patch(ArrayRef<CGDataPatchItem> P) {
- using namespace support;
-
- if (IsFDOStream) {
- raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
- const uint64_t LastPos = FDOStream.tell();
- for (const auto &K : P) {
- FDOStream.seek(K.Pos);
- for (int I = 0; I < K.N; I++)
- write(K.D[I]);
- }
- // Reset the stream to the last position after patching so that users
- // don't accidentally overwrite data. This makes it consistent with
- // the string stream below which replaces the data directly.
- FDOStream.seek(LastPos);
- } else {
- raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
- std::string &Data = SOStream.str(); // with flush
- for (const auto &K : P) {
- for (int I = 0; I < K.N; I++) {
- uint64_t Bytes =
- endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
- Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
- (const char *)&Bytes, sizeof(uint64_t));
- }
- }
- }
- }
-
- // If \c OS is an instance of \c raw_fd_ostream, this field will be
- // true. Otherwise, \c OS will be an raw_string_ostream.
- bool IsFDOStream;
- raw_ostream &OS;
- support::endian::Writer LE;
-};
-
-} // end namespace llvm
-
-void CodeGenDataWriter::addRecord(OutlinedHashTreeRecord &Record) {
- assert(Record.HashTree && "empty hash tree in the record");
- HashTreeRecord.HashTree = std::move(Record.HashTree);
-
- DataKind |= CGDataKind::FunctionOutlinedHashTree;
-}
-
-Error CodeGenDataWriter::write(raw_fd_ostream &OS) {
- CGDataOStream COS(OS);
- return writeImpl(COS);
-}
-
-Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
- using namespace support;
- IndexedCGData::Header Header;
- Header.Magic = IndexedCGData::Magic;
- Header.Version = IndexedCGData::Version;
-
- // Set the CGDataKind depending on the kind.
- Header.DataKind = 0;
- if (static_cast<bool>(DataKind & CGDataKind::FunctionOutlinedHashTree))
- Header.DataKind |=
- static_cast<uint32_t>(CGDataKind::FunctionOutlinedHashTree);
-
- Header.OutlinedHashTreeOffset = 0;
-
- // Only write up to the CGDataKind. We need to remember the offset of the
- // remaining fields to allow back-patching later.
- COS.write(Header.Magic);
- COS.write32(Header.Version);
- COS.write32(Header.DataKind);
-
- // Save the location of Header.OutlinedHashTreeOffset field in \c COS.
- OutlinedHashTreeOffset = COS.tell();
-
- // Reserve the space for OutlinedHashTreeOffset field.
- COS.write(0);
-
- return Error::success();
-}
-
-Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
- if (Error E = writeHeader(COS))
- return E;
-
- uint64_t OutlinedHashTreeFieldStart = COS.tell();
- if (hasOutlinedHashTree())
- HashTreeRecord.serialize(COS.OS);
-
- // Back patch the offsets.
- CGDataPatchItem PatchItems[] = {
- {OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1}};
- COS.patch(PatchItems);
-
- return Error::success();
-}
-
-Error CodeGenDataWriter::writeHeaderText(raw_fd_ostream &OS) {
- if (hasOutlinedHashTree())
- OS << "# Outlined stable hash tree\n:outlined_hash_tree\n";
-
- // TODO: Add more data types in this header
-
- return Error::success();
-}
-
-Error CodeGenDataWriter::writeText(raw_fd_ostream &OS) {
- if (Error E = writeHeaderText(OS))
- return E;
-
- yaml::Output YOS(OS);
- if (hasOutlinedHashTree())
- HashTreeRecord.serializeYAML(YOS);
-
- // TODO: Write more yaml cgdata in order
-
- return Error::success();
-}
diff --git a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
index 9c44eff7953a..01aaedcf7d54 100644
--- a/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/LegacyPassManager.cpp
@@ -12,7 +12,6 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
@@ -1416,8 +1415,7 @@ bool FPPassManager::runOnFunction(Function &F) {
// Store name outside of loop to avoid redundant calls.
const StringRef Name = F.getName();
- llvm::TimeTraceScope FunctionScope(
- "OptFunction", [&F]() { return demangle(F.getName().str()); });
+ llvm::TimeTraceScope FunctionScope("OptFunction", Name);
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
diff --git a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
index fc7b82d522bf..4eff2deef9ab 100644
--- a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineVerifier.h"
-#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -236,12 +235,12 @@ void printIR(raw_ostream &OS, const MachineFunction *MF) {
MF->print(OS);
}
-std::string getIRName(Any IR, bool demangled = false) {
+std::string getIRName(Any IR) {
if (unwrapIR<Module>(IR))
return "[module]";
if (const auto *F = unwrapIR<Function>(IR))
- return demangled ? demangle(F->getName()) : F->getName().str();
+ return F->getName().str();
if (const auto *C = unwrapIR<LazyCallGraph::SCC>(IR))
return C->getName();
@@ -251,7 +250,7 @@ std::string getIRName(Any IR, bool demangled = false) {
L->getHeader()->getParent()->getName().str();
if (const auto *MF = unwrapIR<MachineFunction>(IR))
- return demangled ? demangle(MF->getName()) : MF->getName().str();
+ return MF->getName().str();
llvm_unreachable("Unknown wrapped IR type");
}
@@ -1589,7 +1588,7 @@ void TimeProfilingPassesHandler::registerCallbacks(
}
void TimeProfilingPassesHandler::runBeforePass(StringRef PassID, Any IR) {
- timeTraceProfilerBegin(PassID, getIRName(IR, true));
+ timeTraceProfilerBegin(PassID, getIRName(IR));
}
void TimeProfilingPassesHandler::runAfterPass() { timeTraceProfilerEnd(); }
diff --git a/contrib/llvm-project/llvm/lib/Support/regcomp.c b/contrib/llvm-project/llvm/lib/Support/regcomp.c
index 990aef32a396..daa41eb4912e 100644
--- a/contrib/llvm-project/llvm/lib/Support/regcomp.c
+++ b/contrib/llvm-project/llvm/lib/Support/regcomp.c
@@ -278,7 +278,7 @@ static char nuls[10]; /* place to point scanner in event of error */
#else
#define DUPMAX 255
#endif
-#define INFINITY (DUPMAX + 1)
+#define REGINFINITY (DUPMAX + 1)
#ifndef NDEBUG
static int never = 0; /* for use in asserts; shuts lint up */
@@ -582,7 +582,7 @@ p_ere_exp(struct parse *p)
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
- count2 = INFINITY;
+ count2 = REGINFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
@@ -753,7 +753,7 @@ p_simp_re(struct parse *p,
count2 = p_count(p);
REQUIRE(count <= count2, REG_BADBR);
} else /* single number with comma */
- count2 = INFINITY;
+ count2 = REGINFINITY;
} else /* just a single number */
count2 = count;
repeat(p, pos, count, count2);
@@ -1115,7 +1115,7 @@ repeat(struct parse *p,
# define N 2
# define INF 3
# define REP(f, t) ((f)*8 + (t))
-# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+# define MAP(n) (((n) <= 1) ? (n) : ((n) == REGINFINITY) ? INF : N)
sopno copy;
if (p->error != 0) /* head off possible runaway recursion */
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 3c9b07ad45bf..c64454cc253c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1292,6 +1292,13 @@ void AArch64AsmPrinter::emitGlobalAlias(const Module &M,
StringRef ExpStr = cast<MDString>(Node->getOperand(0))->getString();
MCSymbol *ExpSym = MMI->getContext().getOrCreateSymbol(ExpStr);
MCSymbol *Sym = MMI->getContext().getOrCreateSymbol(GA.getName());
+
+ OutStreamer->beginCOFFSymbolDef(ExpSym);
+ OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ OutStreamer->endCOFFSymbolDef();
+
OutStreamer->beginCOFFSymbolDef(Sym);
OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 2bc14f9821e6..161cf24dd403 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -108,6 +108,10 @@ static bool atomicReadDroppedOnZero(unsigned Opcode) {
case AArch64::LDUMINW: case AArch64::LDUMINX:
case AArch64::LDUMINLB: case AArch64::LDUMINLH:
case AArch64::LDUMINLW: case AArch64::LDUMINLX:
+ case AArch64::SWPB: case AArch64::SWPH:
+ case AArch64::SWPW: case AArch64::SWPX:
+ case AArch64::SWPLB: case AArch64::SWPLH:
+ case AArch64::SWPLW: case AArch64::SWPLX:
return true;
}
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td
index a1ae0873fc19..a4f8f8c2d962 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Features.td
@@ -438,6 +438,15 @@ def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1",
def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_SVE_B16B16",
"Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>;
+// FeatureSVEB16B16 and FeatureSMEB16B16 act as aliases for {FeatureB16B16}, and
+// {FeatureB16B16, FeatureSME2} respectively. This allows LLVM-20 interfacing programs
+// that use '+sve-b16b16' and '+sme-b16b16' to compile in LLVM-19.
+def FeatureSVEB16B16 : ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16",
+ "Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions", [FeatureB16B16]>;
+
+def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16",
+ "Enable SME2.1 ZA-targeting non-widening BFloat16 instructions", [FeatureSME2, FeatureB16B16]>;
+
def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16",
"Enable SME non-widening Float16 instructions", [FeatureSME2]>;
@@ -778,27 +787,26 @@ def HasV8_2aOps : Architecture64<8, 2, "a", "v8.2a",
[HasV8_1aOps, FeaturePsUAO, FeaturePAN_RWV, FeatureRAS, FeatureCCPP],
!listconcat(HasV8_1aOps.DefaultExts, [FeatureRAS])>;
def HasV8_3aOps : Architecture64<8, 3, "a", "v8.3a",
- [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureCCIDX,
- FeatureComplxNum],
+ [HasV8_2aOps, FeatureRCPC, FeaturePAuth, FeatureJS, FeatureComplxNum],
!listconcat(HasV8_2aOps.DefaultExts, [FeatureComplxNum, FeatureJS,
- FeaturePAuth, FeatureRCPC])>;
+ FeaturePAuth, FeatureRCPC, FeatureCCIDX])>;
def HasV8_4aOps : Architecture64<8, 4, "a", "v8.4a",
[HasV8_3aOps, FeatureDotProd, FeatureNV, FeatureMPAM, FeatureDIT,
FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI, FeatureFlagM,
FeatureRCPC_IMMO, FeatureLSE2],
- !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd])>;
+ !listconcat(HasV8_3aOps.DefaultExts, [FeatureDotProd, FeatureDIT, FeatureFlagM])>;
def HasV8_5aOps : Architecture64<8, 5, "a", "v8.5a",
[HasV8_4aOps, FeatureAltFPCmp, FeatureFRInt3264, FeatureSpecRestrict,
- FeatureSSBS, FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
+ FeatureSB, FeaturePredRes, FeatureCacheDeepPersist,
FeatureBranchTargetId],
- !listconcat(HasV8_4aOps.DefaultExts, [])>;
+ !listconcat(HasV8_4aOps.DefaultExts, [FeaturePredRes, FeatureSSBS, FeatureBranchTargetId, FeatureSB])>;
def HasV8_6aOps : Architecture64<8, 6, "a", "v8.6a",
[HasV8_5aOps, FeatureAMVS, FeatureBF16, FeatureFineGrainedTraps,
FeatureEnhancedCounterVirtualization, FeatureMatMulInt8],
!listconcat(HasV8_5aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8])>;
def HasV8_7aOps : Architecture64<8, 7, "a", "v8.7a",
[HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX],
- !listconcat(HasV8_6aOps.DefaultExts, [])>;
+ !listconcat(HasV8_6aOps.DefaultExts, [FeatureWFxT])>;
def HasV8_8aOps : Architecture64<8, 8, "a", "v8.8a",
[HasV8_7aOps, FeatureHBC, FeatureMOPS, FeatureNMI],
!listconcat(HasV8_7aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>;
@@ -816,7 +824,7 @@ def HasV9_1aOps : Architecture64<9, 1, "a", "v9.1a",
!listconcat(HasV9_0aOps.DefaultExts, [FeatureBF16, FeatureMatMulInt8, FeatureRME])>;
def HasV9_2aOps : Architecture64<9, 2, "a", "v9.2a",
[HasV8_7aOps, HasV9_1aOps],
- !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC])>;
+ !listconcat(HasV9_1aOps.DefaultExts, [FeatureMEC, FeatureWFxT])>;
def HasV9_3aOps : Architecture64<9, 3, "a", "v9.3a",
[HasV8_8aOps, HasV9_2aOps],
!listconcat(HasV9_2aOps.DefaultExts, [FeatureMOPS, FeatureHBC])>;
@@ -833,7 +841,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
//v8.2
FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV,
//v8.3
- FeatureCCIDX, FeaturePAuth, FeatureRCPC,
+ FeaturePAuth, FeatureRCPC,
//v8.4
FeatureTRACEV8_4, FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2,
FeatureRCPC_IMMO,
@@ -844,7 +852,7 @@ def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
// For v8-R, we do not enable crypto and align with GCC that enables a more
// minimal set of optional architecture extensions.
!listconcat(
- !listremove(HasV8_5aOps.DefaultExts, [FeatureLSE]),
+ !listremove(HasV8_5aOps.DefaultExts, [FeatureBranchTargetId, FeaturePredRes]),
[FeatureSSBS, FeatureFullFP16, FeatureFP16FML, FeatureSB]
)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bd530903bb66..ba46ededc63a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -240,6 +240,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
// Stack hazard padding size. 0 = disabled.
static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
cl::init(0), cl::Hidden);
+// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
+static cl::opt<unsigned>
+ StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
+ cl::Hidden);
// Whether to insert padding into non-streaming functions (for testing).
static cl::opt<bool>
StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
@@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
+ StackOffset SVEStackSize = getSVEStackSize(MF);
+
+ // For VLA-area objects, just emit an offset at the end of the stack frame.
+ // Whilst not quite correct, these objects do live at the end of the frame and
+ // so it is more useful for analysis for the offset to reflect this.
+ if (MFI.isVariableSizedObjectIndex(FI)) {
+ return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize;
+ }
// This is correct in the absence of any SVE stack objects.
- StackOffset SVEStackSize = getSVEStackSize(MF);
if (!SVEStackSize)
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
@@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
return true;
}
-// Return the FrameID for a Load/Store instruction by looking at the MMO.
-static std::optional<int> getLdStFrameID(const MachineInstr &MI,
- const MachineFrameInfo &MFI) {
- if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
- return std::nullopt;
-
- MachineMemOperand *MMO = *MI.memoperands_begin();
+// Return the FrameID for a MMO.
+static std::optional<int> getMMOFrameID(MachineMemOperand *MMO,
+ const MachineFrameInfo &MFI) {
auto *PSV =
dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue());
if (PSV)
@@ -3552,6 +3560,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
return std::nullopt;
}
+// Return the FrameID for a Load/Store instruction by looking at the first MMO.
+static std::optional<int> getLdStFrameID(const MachineInstr &MI,
+ const MachineFrameInfo &MFI) {
+ if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
+ return std::nullopt;
+
+ return getMMOFrameID(*MI.memoperands_begin(), MFI);
+}
+
// Check if a Hazard slot is needed for the current function, and if so create
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
// which can be used to determine if any hazard padding is needed.
@@ -5029,3 +5046,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
MI->eraseFromParent();
}
}
+
+struct StackAccess {
+ enum AccessType {
+ NotAccessed = 0, // Stack object not accessed by load/store instructions.
+ GPR = 1 << 0, // A general purpose register.
+ PPR = 1 << 1, // A predicate register.
+ FPR = 1 << 2, // A floating point/Neon/SVE register.
+ };
+
+ int Idx;
+ StackOffset Offset;
+ int64_t Size;
+ unsigned AccessTypes;
+
+ StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {}
+
+ bool operator<(const StackAccess &Rhs) const {
+ return std::make_tuple(start(), Idx) <
+ std::make_tuple(Rhs.start(), Rhs.Idx);
+ }
+
+ bool isCPU() const {
+ // Predicate register load and store instructions execute on the CPU.
+ return AccessTypes & (AccessType::GPR | AccessType::PPR);
+ }
+ bool isSME() const { return AccessTypes & AccessType::FPR; }
+ bool isMixed() const { return isCPU() && isSME(); }
+
+ int64_t start() const { return Offset.getFixed() + Offset.getScalable(); }
+ int64_t end() const { return start() + Size; }
+
+ std::string getTypeString() const {
+ switch (AccessTypes) {
+ case AccessType::FPR:
+ return "FPR";
+ case AccessType::PPR:
+ return "PPR";
+ case AccessType::GPR:
+ return "GPR";
+ case AccessType::NotAccessed:
+ return "NA";
+ default:
+ return "Mixed";
+ }
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << getTypeString() << " stack object at [SP"
+ << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
+ if (Offset.getScalable())
+ OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
+ << " * vscale";
+ OS << "]";
+ }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) {
+ SA.print(OS);
+ return OS;
+}
+
+void AArch64FrameLowering::emitRemarks(
+ const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
+
+ SMEAttrs Attrs(MF.getFunction());
+ if (Attrs.hasNonStreamingInterfaceAndBody())
+ return;
+
+ const uint64_t HazardSize =
+ (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
+
+ if (HazardSize == 0)
+ return;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ // Bail if function has no stack objects.
+ if (!MFI.hasStackObjects())
+ return;
+
+ std::vector<StackAccess> StackAccesses(MFI.getNumObjects());
+
+ size_t NumFPLdSt = 0;
+ size_t NumNonFPLdSt = 0;
+
+ // Collect stack accesses via Load/Store instructions.
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
+ continue;
+ for (MachineMemOperand *MMO : MI.memoperands()) {
+ std::optional<int> FI = getMMOFrameID(MMO, MFI);
+ if (FI && !MFI.isDeadObjectIndex(*FI)) {
+ int FrameIdx = *FI;
+
+ size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects();
+ if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
+ StackAccesses[ArrIdx].Idx = FrameIdx;
+ StackAccesses[ArrIdx].Offset =
+ getFrameIndexReferenceFromSP(MF, FrameIdx);
+ StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
+ }
+
+ unsigned RegTy = StackAccess::AccessType::GPR;
+ if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
+ if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
+ RegTy = StackAccess::PPR;
+ else
+ RegTy = StackAccess::FPR;
+ } else if (AArch64InstrInfo::isFpOrNEON(MI)) {
+ RegTy = StackAccess::FPR;
+ }
+
+ StackAccesses[ArrIdx].AccessTypes |= RegTy;
+
+ if (RegTy == StackAccess::FPR)
+ ++NumFPLdSt;
+ else
+ ++NumNonFPLdSt;
+ }
+ }
+ }
+ }
+
+ if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
+ return;
+
+ llvm::sort(StackAccesses);
+ StackAccesses.erase(llvm::remove_if(StackAccesses,
+ [](const StackAccess &S) {
+ return S.AccessTypes ==
+ StackAccess::NotAccessed;
+ }),
+ StackAccesses.end());
+
+ SmallVector<const StackAccess *> MixedObjects;
+ SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
+
+ if (StackAccesses.front().isMixed())
+ MixedObjects.push_back(&StackAccesses.front());
+
+ for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
+ It != End; ++It) {
+ const auto &First = *It;
+ const auto &Second = *(It + 1);
+
+ if (Second.isMixed())
+ MixedObjects.push_back(&Second);
+
+ if ((First.isSME() && Second.isCPU()) ||
+ (First.isCPU() && Second.isSME())) {
+ uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
+ if (Distance < HazardSize)
+ HazardPairs.emplace_back(&First, &Second);
+ }
+ }
+
+ auto EmitRemark = [&](llvm::StringRef Str) {
+ ORE->emit([&]() {
+ auto R = MachineOptimizationRemarkAnalysis(
+ "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
+ return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
+ });
+ };
+
+ for (const auto &P : HazardPairs)
+ EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
+
+ for (const auto *Obj : MixedObjects)
+ EmitRemark(
+ formatv("{0} accessed by both GP and FP instructions", *Obj).str());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 0ebab1700e9c..c19731249620 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -13,8 +13,9 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
-#include "llvm/Support/TypeSize.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -178,6 +179,9 @@ private:
inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI,
int64_t NegProbeSize,
Register TargetReg) const;
+
+ void emitRemarks(const MachineFunction &MF,
+ MachineOptimizationRemarkEmitter *ORE) const override;
};
} // End llvm namespace
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6d413a09407a..62078822c89b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17719,6 +17719,9 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
// and generate vecreduce.add(concat_vector(DOT, DOT2, ..)).
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
+ if (!ST->isNeonAvailable())
+ return SDValue();
+
if (!ST->hasDotProd())
return performVecReduceAddCombineWithUADDLP(N, DAG);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td
index 71384a23c49a..6df87fc6a815 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -688,6 +688,7 @@ def ProcessorFeatures {
FeatureMatMulInt8, FeatureBF16, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
FeatureFP16FML,
+ FeatureCCIDX,
FeatureSB, FeaturePAuth, FeatureSSBS, FeatureSVE, FeatureSVE2,
FeatureComplxNum, FeatureCRC, FeatureDotProd,
FeatureFPARMv8,FeatureFullFP16, FeatureJS, FeatureLSE,
@@ -695,6 +696,7 @@ def ProcessorFeatures {
list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
FeatureFP16FML,
+ FeatureCCIDX,
FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS,
@@ -703,6 +705,7 @@ def ProcessorFeatures {
list<SubtargetFeature> A520AE = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
FeatureFP16FML,
+ FeatureCCIDX,
FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS,
@@ -734,12 +737,14 @@ def ProcessorFeatures {
FeaturePerfMon, FeatureRCPC, FeatureSPE,
FeatureSSBS, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM];
list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+ FeatureCCIDX, FeatureSSBS,
FeatureETE, FeatureMTE, FeatureFP16FML,
FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8,
FeaturePAuth, FeatureFlagM, FeatureSB, FeatureSVE, FeatureSVE2,
FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8,
FeatureFullFP16, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
list<SubtargetFeature> A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE,
+ FeatureCCIDX,
FeatureFP16FML, FeatureSVE, FeatureTRBE,
FeatureSVE2BitPerm, FeatureBF16, FeatureETE,
FeaturePerfMon, FeatureMatMulInt8, FeatureSPE,
@@ -749,6 +754,7 @@ def ProcessorFeatures {
FeatureJS, FeatureLSE, FeatureRAS,
FeatureRCPC, FeatureRDM];
list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+ FeatureCCIDX,
FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
FeaturePerfMon, FeatureSPE, FeatureSPE_EEF,
FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
@@ -757,6 +763,7 @@ def ProcessorFeatures {
FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS,
FeatureRCPC, FeatureRDM];
list<SubtargetFeature> A720AE = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+ FeatureCCIDX,
FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
FeaturePerfMon, FeatureSPE, FeatureSPE_EEF,
FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
@@ -765,6 +772,7 @@ def ProcessorFeatures {
FeatureJS, FeatureLSE, FeatureNEON, FeatureRAS,
FeatureRCPC, FeatureRDM];
list<SubtargetFeature> A725 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+ FeatureCCIDX,
FeatureETE, FeaturePerfMon, FeatureSPE,
FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE,
FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS,
@@ -800,6 +808,7 @@ def ProcessorFeatures {
FeatureMatMulInt8, FeatureBF16, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
FeatureFP16FML,
+ FeatureCCIDX,
FeaturePAuth, FeatureSSBS, FeatureSB, FeatureSVE, FeatureSVE2, FeatureFlagM,
FeatureComplxNum, FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureFullFP16,
FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
@@ -808,6 +817,7 @@ def ProcessorFeatures {
FeatureSPE, FeatureBF16, FeatureMatMulInt8,
FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,
FeatureFP16FML,
+ FeatureCCIDX,
FeatureSB, FeaturePAuth, FeaturePredRes, FeatureFlagM, FeatureSSBS,
FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS,
FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureDotProd];
@@ -815,11 +825,13 @@ def ProcessorFeatures {
FeaturePerfMon, FeatureETE, FeatureTRBE,
FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,
FeatureFP16FML, FeatureSPE_EEF,
+ FeatureCCIDX,
FeatureSB, FeatureSSBS, FeaturePAuth, FeatureFlagM, FeaturePredRes,
FeatureSVE, FeatureSVE2, FeatureComplxNum, FeatureCRC, FeatureDotProd,
FeatureFPARMv8, FeatureFullFP16, FeatureMatMulInt8, FeatureJS, FeatureLSE,
FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM, FeatureBF16];
list<SubtargetFeature> X925 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+ FeatureCCIDX,
FeatureETE, FeaturePerfMon, FeatureSPE,
FeatureSVE2BitPerm, FeatureSPE_EEF, FeatureTRBE,
FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS,
@@ -863,23 +875,26 @@ def ProcessorFeatures {
list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
- FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE,
- FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM,
- FeatureBF16, FeatureDotProd, FeatureMatMulInt8];
+ FeatureComplxNum, FeatureCRC, FeatureJS,
+ FeatureLSE, FeaturePAuth,
+ FeatureRAS, FeatureRCPC, FeatureRDM,
+ FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX,
- FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE,
- FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM,
- FeatureBF16, FeatureDotProd, FeatureMatMulInt8];
+ FeatureComplxNum, FeatureCRC, FeatureJS,
+ FeatureLSE, FeaturePAuth,
+ FeatureRAS, FeatureRCPC, FeatureRDM,
+ FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX,
- FeatureComplxNum, FeatureCRC, FeatureJS, FeatureLSE,
- FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM,
- FeatureBF16, FeatureDotProd, FeatureMatMulInt8];
+ FeatureComplxNum, FeatureCRC, FeatureJS,
+ FeatureLSE, FeaturePAuth,
+ FeatureRAS, FeatureRCPC, FeatureRDM,
+ FeatureBF16, FeatureDotProd, FeatureMatMulInt8, FeatureSSBS];
list<SubtargetFeature> AppleM4 = [HasV9_2aOps, FeatureSHA2, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
@@ -909,6 +924,7 @@ def ProcessorFeatures {
FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
FeatureSVE2BitPerm, FeatureTRBE,
FeaturePerfMon,
+ FeatureCCIDX,
FeatureDotProd, FeatureFullFP16, FeatureSB, FeatureSSBS, FeatureSVE,
FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE,
FeatureNEON, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM];
@@ -916,6 +932,7 @@ def ProcessorFeatures {
FeatureFullFP16, FeatureMTE, FeaturePerfMon,
FeatureRandGen, FeatureSPE, FeatureSPE_EEF,
FeatureSVE2BitPerm,
+ FeatureCCIDX,
FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM,
FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum,
FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
@@ -926,6 +943,7 @@ def ProcessorFeatures {
FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
FeaturePerfMon, FeatureRandGen, FeatureSPE,
FeatureSSBS, FeatureSVE,
+ FeatureCCIDX,
FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum,
FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS,
FeatureRCPC, FeatureRDM];
@@ -934,6 +952,7 @@ def ProcessorFeatures {
FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
FeaturePerfMon, FeatureRandGen, FeatureSPE,
FeatureSSBS, FeatureSVE,
+ FeatureCCIDX,
FeatureSHA3, FeatureSM4, FeatureDotProd, FeatureComplxNum,
FeatureCRC, FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS,
FeatureRCPC, FeatureRDM];
@@ -941,12 +960,14 @@ def ProcessorFeatures {
FeaturePerfMon, FeatureETE, FeatureMatMulInt8,
FeatureNEON, FeatureSVE2BitPerm, FeatureFP16FML,
FeatureMTE, FeatureRandGen,
+ FeatureCCIDX,
FeatureSVE, FeatureSVE2, FeatureSSBS, FeatureFullFP16, FeatureDotProd,
FeatureComplxNum, FeatureCRC, FeatureFPARMv8, FeatureJS, FeatureLSE,
FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM];
list<SubtargetFeature> NeoverseV3 = [HasV9_2aOps, FeatureETE, FeatureFP16FML,
FeatureFullFP16, FeatureLS64, FeatureMTE,
FeaturePerfMon, FeatureRandGen, FeatureSPE,
+ FeatureCCIDX,
FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE,
FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM,
FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
@@ -957,12 +978,14 @@ def ProcessorFeatures {
FeaturePerfMon, FeatureRandGen, FeatureSPE,
FeatureSPE_EEF, FeatureSVE2BitPerm, FeatureBRBE,
FeatureSSBS, FeatureSB, FeaturePredRes, FeaturePAuth, FeatureFlagM,
+ FeatureCCIDX,
FeatureSVE, FeatureSVE2, FeatureBF16, FeatureComplxNum, FeatureCRC,
FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS,
FeatureLSE, FeatureNEON, FeatureRAS, FeatureRCPC, FeatureRDM,
FeatureRME];
list<SubtargetFeature> Saphira = [HasV8_4aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
FeatureNEON, FeatureSPE, FeaturePerfMon, FeatureCRC,
+ FeatureCCIDX,
FeatureLSE, FeatureRDM, FeatureRAS, FeatureRCPC];
list<SubtargetFeature> ThunderX = [HasV8_0aOps, FeatureCRC, FeatureSHA2, FeatureAES,
FeatureFPARMv8, FeaturePerfMon, FeatureNEON];
@@ -971,6 +994,7 @@ def ProcessorFeatures {
FeatureRDM];
list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureSHA2, FeatureAES,
FeatureFPARMv8, FeatureNEON, FeatureLSE,
+ FeatureCCIDX,
FeaturePAuth, FeaturePerfMon, FeatureComplxNum,
FeatureJS, FeatureRAS, FeatureRCPC, FeatureRDM];
list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
@@ -983,6 +1007,7 @@ def ProcessorFeatures {
FeatureSHA2, FeatureSHA3, FeatureAES,
FeatureFullFP16, FeatureBF16, FeatureComplxNum, FeatureCRC,
FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureJS,
+ FeatureCCIDX,
FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC, FeatureRDM];
list<SubtargetFeature> Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
FeatureMTE, FeatureSSBS, FeatureRandGen,
@@ -991,6 +1016,7 @@ def ProcessorFeatures {
FeatureFullFP16, FeatureBF16, FeatureComplxNum,
FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC,
+ FeatureCCIDX,
FeatureRDM];
list<SubtargetFeature> Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon,
FeatureMTE, FeatureSSBS, FeatureRandGen,
@@ -999,6 +1025,7 @@ def ProcessorFeatures {
FeatureWFxT, FeatureFullFP16, FeatureBF16, FeatureComplxNum,
FeatureCRC, FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
FeatureJS, FeatureLSE, FeaturePAuth, FeatureRAS, FeatureRCPC,
+ FeatureCCIDX,
FeatureRDM];
list<SubtargetFeature> Oryon = [HasV8_6aOps, FeatureNEON, FeaturePerfMon,
@@ -1007,6 +1034,7 @@ def ProcessorFeatures {
FeatureSHA3, FeatureAES,
FeatureSPE, FeatureBF16, FeatureComplxNum, FeatureCRC,
FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
+ FeatureSSBS, FeatureCCIDX,
FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
// ETE and TRBE are future architecture extensions. We temporarily enable them
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 45148449dfb8..39fba6a257bb 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -254,7 +254,8 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
return false;
if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
- CallerAttrs.requiresSMChange(CalleeAttrs)) {
+ CallerAttrs.requiresSMChange(CalleeAttrs) ||
+ CallerAttrs.requiresPreservingZT0(CalleeAttrs)) {
if (hasPossibleIncompatibleOps(Callee))
return false;
}
@@ -540,7 +541,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
InstructionCost
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
+ // The code-generator is currently not able to handle scalable vectors
+ // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+ // it. This change will be removed when code-generation for these types is
+ // sufficiently reliable.
auto *RetTy = ICA.getReturnType();
+ if (auto *VTy = dyn_cast<ScalableVectorType>(RetTy))
+ if (VTy->getElementCount() == ElementCount::getScalable(1))
+ return InstructionCost::getInvalid();
+
switch (ICA.getID()) {
case Intrinsic::experimental_vector_histogram_add:
if (!ST->hasSVE2())
@@ -2295,6 +2304,11 @@ std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
return std::nullopt;
}
+bool AArch64TTIImpl::enableScalableVectorization() const {
+ return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
+ EnableScalableAutovecInStreamingMode);
+}
+
TypeSize
AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
switch (K) {
@@ -3018,6 +3032,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
ArrayRef<const Value *> Args,
const Instruction *CxtI) {
+ // The code-generator is currently not able to handle scalable vectors
+ // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+ // it. This change will be removed when code-generation for these types is
+ // sufficiently reliable.
+ if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
+ if (VTy->getElementCount() == ElementCount::getScalable(1))
+ return InstructionCost::getInvalid();
+
// TODO: Handle more cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -3792,6 +3814,14 @@ InstructionCost
AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
+ // The code-generator is currently not able to handle scalable vectors
+ // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+ // it. This change will be removed when code-generation for these types is
+ // sufficiently reliable.
+ if (auto *VTy = dyn_cast<ScalableVectorType>(Ty))
+ if (VTy->getElementCount() == ElementCount::getScalable(1))
+ return InstructionCost::getInvalid();
+
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
@@ -3836,6 +3866,14 @@ InstructionCost
AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {
+ // The code-generator is currently not able to handle scalable vectors
+ // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting
+ // it. This change will be removed when code-generation for these types is
+ // sufficiently reliable.
+ if (auto *VTy = dyn_cast<ScalableVectorType>(ValTy))
+ if (VTy->getElementCount() == ElementCount::getScalable(1))
+ return InstructionCost::getInvalid();
+
if (TTI::requiresOrderedReduction(FMF)) {
if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
InstructionCost BaseCost =
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a9189fd53f40..4a6457d7a7db 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -381,7 +381,7 @@ public:
return ST->isSVEorStreamingSVEAvailable();
}
- bool enableScalableVectorization() const { return ST->isSVEAvailable(); }
+ bool enableScalableVectorization() const;
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9b2cab2eb73a..32ecf350db59 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1581,7 +1581,18 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
// Clamp is applied after omod, so it is OK if omod is set.
DefClamp->setImm(1);
- MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+
+ Register DefReg = Def->getOperand(0).getReg();
+ Register MIDstReg = MI.getOperand(0).getReg();
+ if (TRI->isSGPRReg(*MRI, DefReg)) {
+ // Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*
+ // instruction with a VGPR dst.
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
+ MIDstReg)
+ .addReg(DefReg);
+ } else {
+ MRI->replaceRegWith(MIDstReg, DefReg);
+ }
MI.eraseFromParent();
// Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 9cc162d041f4..883808ae981f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -50,6 +50,13 @@
using namespace llvm;
+// Whether Big-endian GISel is enabled, defaults to off, can be enabled for
+// testing.
+static cl::opt<bool>
+ EnableGISelBigEndian("enable-arm-gisel-bigendian", cl::Hidden,
+ cl::init(false),
+ cl::desc("Enable Global-ISel Big Endian Lowering"));
+
ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
: CallLowering(&TLI) {}
@@ -539,3 +546,5 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
return true;
}
+
+bool ARMCallLowering::enableBigEndian() const { return EnableGISelBigEndian; } \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h
index 38095617fb4f..32c95a044d7b 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -42,6 +42,8 @@ public:
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
+ bool enableBigEndian() const override;
+
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
ArrayRef<Register> VRegs,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index f0933765bbcb..86ce6b4e05ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1223,6 +1223,10 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {
if (ER.Kind == MachineOperand::MO_GlobalAddress)
if (ER.V.GV->getName().empty())
return;
+ // Ignore block address that points to block in another function
+ if (ER.Kind == MachineOperand::MO_BlockAddress)
+ if (ER.V.BA->getFunction() != &(MI.getMF()->getFunction()))
+ return;
Extenders.push_back(ED);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp
index bd8ef43da625..64a0e9321598 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -1608,8 +1608,8 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8);
emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8);
- emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]);
- emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF);
+ emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[1]).addImm(0xFF);
+ emitInst(Mips::OR, DestReg).addReg(TempReg[0]).addReg(TempReg[2]);
updateValueMap(II, DestReg);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 1963582ce686..a57ed33bda9c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1007,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// R0 cannot be used as a base register, but it can be used as an
// index in a store-indexed.
int LastOffset = 0;
- if (HasFP) {
+ if (HasFP) {
// R0 += (FPOffset-LastOffset).
// Need addic, since addi treats R0 as 0.
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
@@ -2025,8 +2025,18 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// code. Same goes for the base pointer and the PIC base register.
if (needsFP(MF))
SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
- if (RegInfo->hasBasePointer(MF))
+ if (RegInfo->hasBasePointer(MF)) {
SavedRegs.reset(RegInfo->getBaseRegister(MF));
+ // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match
+ // AIX trackback table requirement.
+ if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) &&
+ Subtarget.isAIXABI()) {
+ assert(
+ (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) &&
+ "Invalid base register on AIX!");
+ SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31);
+ }
+ }
if (FI->usesPICBase())
SavedRegs.reset(PPC::R30);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 0a66a38f6d5a..be2e880ecd3a 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
auto *VTy = cast<VectorType>(II.getType());
IRBuilder<> Builder(&II);
-
- // Extend VL from i32 to XLen if needed.
- if (ST->is64Bit())
- VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
-
Type *STy = VTy->getElementType();
Value *Val = Builder.CreateLoad(STy, BasePtr);
- const auto &TLI = *ST->getTargetLowering();
- Value *Res;
-
- // TODO: Also support fixed/illegal vector types to splat with evl = vl.
- if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
- unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
- : Intrinsic::riscv_vmv_v_x;
- Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
- {PoisonValue::get(VTy), Val, VL});
- } else {
- Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
- }
+ Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
+ {Val, II.getOperand(2), VL});
II.replaceAllUsesWith(Res);
II.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 809be499ee0f..9d2990c98ce2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -171,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
// If the memcpy has metadata describing the members, see if we can get the
- // TBAA tag describing our copy.
+ // TBAA, scope and noalias tags describing our copy.
AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
Value *Src = MI->getArgOperand(1);