diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-24 19:11:41 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2024-01-24 19:11:41 +0000 |
| commit | 4df029cc74e5ec124f14a5682e44999ce4f086df (patch) | |
| tree | fa2e8720472930df97920b4185215c910159f10d /llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | |
| parent | 950076cd18f3fa9d789b4add9d405898efff09a5 (diff) | |
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 69 |
1 files changed, 59 insertions, 10 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 13b5e578391d..d611338fc268 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -55,6 +55,9 @@ static cl::opt<unsigned> InlineCallPenaltyChangeSM( "inline-call-penalty-sm-change", cl::init(10), cl::Hidden, cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM")); +static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select", + cl::init(true), cl::Hidden); + namespace { class TailFoldingOption { // These bitfields will only ever be set to something non-zero in operator=, @@ -236,8 +239,9 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return false; if (CallerAttrs.requiresLazySave(CalleeAttrs) || - CallerAttrs.requiresSMChange(CalleeAttrs, - /*BodyOverridesInterface=*/true)) { + (CallerAttrs.requiresSMChange(CalleeAttrs) && + (!CallerAttrs.hasStreamingInterfaceOrBody() || + !CalleeAttrs.hasStreamingBody()))) { if (hasPossibleIncompatibleOps(Callee)) return false; } @@ -3176,14 +3180,47 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, if (Ty->isPtrOrPtrVectorTy()) return LT.first; - // Check truncating stores and extending loads. - if (useNeonVector(Ty) && - Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) { - // v4i8 types are lowered to scalar a load/store and sshll/xtn. - if (VT == MVT::v4i8) - return 2; - // Otherwise we need to scalarize. - return cast<FixedVectorType>(Ty)->getNumElements() * 2; + if (useNeonVector(Ty)) { + // Check truncating stores and extending loads. + if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) { + // v4i8 types are lowered to scalar a load/store and sshll/xtn. + if (VT == MVT::v4i8) + return 2; + // Otherwise we need to scalarize. + return cast<FixedVectorType>(Ty)->getNumElements() * 2; + } + EVT EltVT = VT.getVectorElementType(); + unsigned EltSize = EltVT.getScalarSizeInBits(); + if (!isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 || + VT.getVectorNumElements() >= (128 / EltSize) || !Alignment || + *Alignment != Align(1)) + return LT.first; + // FIXME: v3i8 lowering currently is very inefficient, due to automatic + // widening to v4i8, which produces suboptimal results. + if (VT.getVectorNumElements() == 3 && EltVT == MVT::i8) + return LT.first; + + // Check non-power-of-2 loads/stores for legal vector element types with + // NEON. Non-power-of-2 memory ops will get broken down to a set of + // operations on smaller power-of-2 ops, including ld1/st1. + LLVMContext &C = Ty->getContext(); + InstructionCost Cost(0); + SmallVector<EVT> TypeWorklist; + TypeWorklist.push_back(VT); + while (!TypeWorklist.empty()) { + EVT CurrVT = TypeWorklist.pop_back_val(); + unsigned CurrNumElements = CurrVT.getVectorNumElements(); + if (isPowerOf2_32(CurrNumElements)) { + Cost += 1; + continue; + } + + unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2; + TypeWorklist.push_back(EVT::getVectorVT(C, EltVT, PrevPow2)); + TypeWorklist.push_back( + EVT::getVectorVT(C, EltVT, CurrNumElements - PrevPow2)); + } + return Cost; } return LT.first; @@ -4014,3 +4051,15 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, return AM.Scale != 0 && AM.Scale != 1; return -1; } + +bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) { + // For the binary operators (e.g. or) we need to be more careful than + // selects, here we only transform them if they are already at a natural + // break point in the code - the end of a block with an unconditional + // terminator. + if (EnableOrLikeSelectOpt && I->getOpcode() == Instruction::Or && + isa<BranchInst>(I->getNextNode()) && + cast<BranchInst>(I->getNextNode())->isUnconditional()) + return true; + return BaseT::shouldTreatInstructionLikeSelect(I); +}
\ No newline at end of file |
