diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 56 |
1 files changed, 44 insertions, 12 deletions
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b8833e5a5552d..4d59da0c646d2 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -176,7 +176,8 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -436,7 +437,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, } int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) { + Type *CondTy, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -463,11 +464,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return Entry->Cost; } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, - unsigned Alignment, unsigned AddressSpace) { + unsigned Alignment, unsigned AddressSpace, + const Instruction *I) { auto LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store && @@ -505,12 +507,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); - Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); // ldN/stN only support legal vector types of size 64 or 128 in bits. - if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) - return Factor; + // Accesses having vector types that are a multiple of 128 bits can be + // matched to more than one ldN/stN instruction. + if (NumElts % Factor == 0 && + TLI->isLegalInterleavedAccessType(SubVecTy, DL)) + return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, @@ -594,8 +598,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_ld4: Info.ReadMem = true; Info.WriteMem = false; - Info.IsSimple = true; - Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(0); break; case Intrinsic::aarch64_neon_st2: @@ -603,8 +605,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_st4: Info.ReadMem = false; Info.WriteMem = true; - Info.IsSimple = true; - Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1); break; } @@ -628,6 +628,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, return true; } +/// See if \p I should be considered for address type promotion. We check if \p +/// I is a sext with right type and used in memory accesses. If it used in a +/// "complex" getelementptr, we allow it to be promoted without finding other +/// sext instructions that sign extended the same initial value. A getelementptr +/// is considered as "complex" if it has more than 2 operands. +bool AArch64TTIImpl::shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) { + bool Considerable = false; + AllowPromotionWithoutCommonHeader = false; + if (!isa<SExtInst>(&I)) + return false; + Type *ConsideredSExtType = + Type::getInt64Ty(I.getParent()->getParent()->getContext()); + if (I.getType() != ConsideredSExtType) + return false; + // See if the sext is the one with the right type and used in at least one + // GetElementPtrInst. + for (const User *U : I.users()) { + if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) { + Considerable = true; + // A getelementptr is considered as "complex" if it has more than 2 + // operands. We will promote a SExt used in such complex GEP as we + // expect some computation to be merged if they are done on 64 bits. + if (GEPInst->getNumOperands() > 2) { + AllowPromotionWithoutCommonHeader = true; + break; + } + } + } + return Considerable; +} + unsigned AArch64TTIImpl::getCacheLineSize() { return ST->getCacheLineSize(); } |