Vendor import of llvm-project main llvmorg-18-init-18359-g93248729cfae, - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2024-01-24 19:11:41 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2024-01-24 19:11:41 +0000
commit	4df029cc74e5ec124f14a5682e44999ce4f086df (patch)
tree	fa2e8720472930df97920b4185215c910159f10d /llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
parent	950076cd18f3fa9d789b4add9d405898efff09a5 (diff)

vendor/llvm-project/llvmorg-18-init-18359-g93248729cfae

Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp')

-rw-r--r--

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1 files changed, 59 insertions, 10 deletions

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 13b5e578391d..d611338fc268 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

@@ -55,6 +55,9 @@ static cl::opt<unsigned> InlineCallPenaltyChangeSM(

"inline-call-penalty-sm-change", cl::init(10), cl::Hidden,

cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));

+static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",

+ cl::init(true), cl::Hidden);

namespace {

class TailFoldingOption {

// These bitfields will only ever be set to something non-zero in operator=,

@@ -236,8 +239,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,

return false;

if (CallerAttrs.requiresLazySave(CalleeAttrs) ||

- CallerAttrs.requiresSMChange(CalleeAttrs,

- /*BodyOverridesInterface=*/true)) {

+ (CallerAttrs.requiresSMChange(CalleeAttrs) &&

+ (!CallerAttrs.hasStreamingInterfaceOrBody() ||

+ !CalleeAttrs.hasStreamingBody()))) {

if (hasPossibleIncompatibleOps(Callee))

return false;

}

@@ -3176,14 +3180,47 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,

if (Ty->isPtrOrPtrVectorTy())

return LT.first;

- // Check truncating stores and extending loads.

- if (useNeonVector(Ty) &&

- Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {

- // v4i8 types are lowered to scalar a load/store and sshll/xtn.

- if (VT == MVT::v4i8)

- return 2;

- // Otherwise we need to scalarize.

- return cast<FixedVectorType>(Ty)->getNumElements() * 2;

+ if (useNeonVector(Ty)) {

+ // Check truncating stores and extending loads.

+ if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {

+ // v4i8 types are lowered to scalar a load/store and sshll/xtn.

+ if (VT == MVT::v4i8)

+ return 2;

+ // Otherwise we need to scalarize.

+ return cast<FixedVectorType>(Ty)->getNumElements() * 2;

+ }

+ EVT EltVT = VT.getVectorElementType();

+ unsigned EltSize = EltVT.getScalarSizeInBits();

+ if (!isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||

+ VT.getVectorNumElements() >= (128 / EltSize) || !Alignment ||

+ *Alignment != Align(1))

+ return LT.first;

+ // FIXME: v3i8 lowering currently is very inefficient, due to automatic

+ // widening to v4i8, which produces suboptimal results.

+ if (VT.getVectorNumElements() == 3 && EltVT == MVT::i8)

+ return LT.first;

+ // Check non-power-of-2 loads/stores for legal vector element types with

+ // NEON. Non-power-of-2 memory ops will get broken down to a set of

+ // operations on smaller power-of-2 ops, including ld1/st1.

+ LLVMContext &C = Ty->getContext();

+ InstructionCost Cost(0);

+ SmallVector<EVT> TypeWorklist;

+ TypeWorklist.push_back(VT);

+ while (!TypeWorklist.empty()) {

+ EVT CurrVT = TypeWorklist.pop_back_val();

+ unsigned CurrNumElements = CurrVT.getVectorNumElements();

+ if (isPowerOf2_32(CurrNumElements)) {

+ Cost += 1;

+ continue;

+ }

+ unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;

+ TypeWorklist.push_back(EVT::getVectorVT(C, EltVT, PrevPow2));

+ TypeWorklist.push_back(

+ EVT::getVectorVT(C, EltVT, CurrNumElements - PrevPow2));

+ }

+ return Cost;

}

return LT.first;

@@ -4014,3 +4051,15 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,

return AM.Scale != 0 && AM.Scale != 1;

return -1;

}

+bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {

+ // For the binary operators (e.g. or) we need to be more careful than

+ // selects, here we only transform them if they are already at a natural

+ // break point in the code - the end of a block with an unconditional

+ // terminator.

+ if (EnableOrLikeSelectOpt && I->getOpcode() == Instruction::Or &&

+ isa<BranchInst>(I->getNextNode()) &&

+ cast<BranchInst>(I->getNextNode())->isUnconditional())

+ return true;

+ return BaseT::shouldTreatInstructionLikeSelect(I);

+} \ No newline at end of file