summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-01-24 19:11:41 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-01-24 19:11:41 +0000
commit4df029cc74e5ec124f14a5682e44999ce4f086df (patch)
treefa2e8720472930df97920b4185215c910159f10d /llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
parent950076cd18f3fa9d789b4add9d405898efff09a5 (diff)
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp69
1 files changed, 59 insertions, 10 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 13b5e578391d..d611338fc268 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -55,6 +55,9 @@ static cl::opt<unsigned> InlineCallPenaltyChangeSM(
"inline-call-penalty-sm-change", cl::init(10), cl::Hidden,
cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));
+static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
+ cl::init(true), cl::Hidden);
+
namespace {
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
@@ -236,8 +239,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
return false;
if (CallerAttrs.requiresLazySave(CalleeAttrs) ||
- CallerAttrs.requiresSMChange(CalleeAttrs,
- /*BodyOverridesInterface=*/true)) {
+ (CallerAttrs.requiresSMChange(CalleeAttrs) &&
+ (!CallerAttrs.hasStreamingInterfaceOrBody() ||
+ !CalleeAttrs.hasStreamingBody()))) {
if (hasPossibleIncompatibleOps(Callee))
return false;
}
@@ -3176,14 +3180,47 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
if (Ty->isPtrOrPtrVectorTy())
return LT.first;
- // Check truncating stores and extending loads.
- if (useNeonVector(Ty) &&
- Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
- // v4i8 types are lowered to scalar a load/store and sshll/xtn.
- if (VT == MVT::v4i8)
- return 2;
- // Otherwise we need to scalarize.
- return cast<FixedVectorType>(Ty)->getNumElements() * 2;
+ if (useNeonVector(Ty)) {
+ // Check truncating stores and extending loads.
+ if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
+ // v4i8 types are lowered to scalar a load/store and sshll/xtn.
+ if (VT == MVT::v4i8)
+ return 2;
+ // Otherwise we need to scalarize.
+ return cast<FixedVectorType>(Ty)->getNumElements() * 2;
+ }
+ EVT EltVT = VT.getVectorElementType();
+ unsigned EltSize = EltVT.getScalarSizeInBits();
+ if (!isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
+ VT.getVectorNumElements() >= (128 / EltSize) || !Alignment ||
+ *Alignment != Align(1))
+ return LT.first;
+ // FIXME: v3i8 lowering currently is very inefficient, due to automatic
+ // widening to v4i8, which produces suboptimal results.
+ if (VT.getVectorNumElements() == 3 && EltVT == MVT::i8)
+ return LT.first;
+
+ // Check non-power-of-2 loads/stores for legal vector element types with
+ // NEON. Non-power-of-2 memory ops will get broken down to a set of
+ // operations on smaller power-of-2 ops, including ld1/st1.
+ LLVMContext &C = Ty->getContext();
+ InstructionCost Cost(0);
+ SmallVector<EVT> TypeWorklist;
+ TypeWorklist.push_back(VT);
+ while (!TypeWorklist.empty()) {
+ EVT CurrVT = TypeWorklist.pop_back_val();
+ unsigned CurrNumElements = CurrVT.getVectorNumElements();
+ if (isPowerOf2_32(CurrNumElements)) {
+ Cost += 1;
+ continue;
+ }
+
+ unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;
+ TypeWorklist.push_back(EVT::getVectorVT(C, EltVT, PrevPow2));
+ TypeWorklist.push_back(
+ EVT::getVectorVT(C, EltVT, CurrNumElements - PrevPow2));
+ }
+ return Cost;
}
return LT.first;
@@ -4014,3 +4051,15 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
return AM.Scale != 0 && AM.Scale != 1;
return -1;
}
+
+bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
+ // For the binary operators (e.g. or) we need to be more careful than
+ // selects, here we only transform them if they are already at a natural
+ // break point in the code - the end of a block with an unconditional
+ // terminator.
+ if (EnableOrLikeSelectOpt && I->getOpcode() == Instruction::Or &&
+ isa<BranchInst>(I->getNextNode()) &&
+ cast<BranchInst>(I->getNextNode())->isUnconditional())
+ return true;
+ return BaseT::shouldTreatInstructionLikeSelect(I);
+} \ No newline at end of file