aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetMachine.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp115
1 files changed, 61 insertions, 54 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 559879139758..036719be06d8 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -21,6 +21,7 @@
#include "TargetInfo/AArch64TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/CFIFixup.h"
#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
@@ -196,6 +197,11 @@ static cl::opt<bool> EnableGISelLoadStoreOptPostLegal(
cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ EnableSinkFold("aarch64-enable-sink-fold",
+ cl::desc("Enable sinking and folding of instruction copies"),
+ cl::init(true), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -220,6 +226,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
+ initializeAArch64PointerAuthPass(*PR);
initializeAArch64PostLegalizerCombinerPass(*PR);
initializeAArch64PostLegalizerLoweringPass(*PR);
initializeAArch64PostSelectOptimizePass(*PR);
@@ -319,7 +326,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT,
+ CodeGenOptLevel OL, bool JIT,
bool LittleEndian)
: LLVMTargetMachine(T,
computeDataLayout(TT, Options.MCOptions, LittleEndian),
@@ -357,7 +364,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
// Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
// MachO/CodeModel::Large, which GlobalISel does not support.
- if (getOptLevel() <= EnableGlobalISelAtO &&
+ if (static_cast<int>(getOptLevel()) <= EnableGlobalISelAtO &&
TT.getArch() != Triple::aarch64_32 &&
TT.getEnvironment() != Triple::GNUILP32 &&
!(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
@@ -390,6 +397,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : TargetCPU;
StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
+ bool HasMinSize = F.hasMinSize();
bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
F.hasFnAttribute("aarch64_pstate_sm_body");
@@ -398,11 +406,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
unsigned MinSVEVectorSize = 0;
unsigned MaxSVEVectorSize = 0;
- Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange);
- if (VScaleRangeAttr.isValid()) {
- std::optional<unsigned> VScaleMax = VScaleRangeAttr.getVScaleRangeMax();
- MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128;
- MaxSVEVectorSize = VScaleMax ? *VScaleMax * 128 : 0;
+ if (F.hasFnAttribute(Attribute::VScaleRange)) {
+ ConstantRange CR = getVScaleRange(&F, 64);
+ MinSVEVectorSize = CR.getUnsignedMin().getZExtValue() * 128;
+ MaxSVEVectorSize = CR.getUnsignedMax().getZExtValue() * 128;
} else {
MinSVEVectorSize = SVEVectorBitsMinOpt;
MaxSVEVectorSize = SVEVectorBitsMaxOpt;
@@ -416,13 +423,9 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
"Minimum SVE vector size should not be larger than its maximum!");
// Sanitize user input in case of no asserts
- if (MaxSVEVectorSize == 0)
- MinSVEVectorSize = (MinSVEVectorSize / 128) * 128;
- else {
- MinSVEVectorSize =
- (std::min(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
- MaxSVEVectorSize =
- (std::max(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
+ if (MaxSVEVectorSize != 0) {
+ MinSVEVectorSize = std::min(MinSVEVectorSize, MaxSVEVectorSize);
+ MaxSVEVectorSize = std::max(MinSVEVectorSize, MaxSVEVectorSize);
}
SmallString<512> Key;
@@ -430,8 +433,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
<< MaxSVEVectorSize
<< "StreamingSVEMode=" << StreamingSVEMode
<< "StreamingCompatibleSVEMode="
- << StreamingCompatibleSVEMode << CPU << TuneCPU
- << FS;
+ << StreamingCompatibleSVEMode << CPU << TuneCPU << FS
+ << "HasMinSize=" << HasMinSize;
auto &I = SubtargetMap[Key];
if (!I) {
@@ -441,13 +444,12 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
- MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
+ MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode,
+ HasMinSize);
}
assert((!StreamingSVEMode || I->hasSME()) &&
"Expected SME to be available");
- assert((!StreamingCompatibleSVEMode || I->hasSVEorSME()) &&
- "Expected SVE or SME to be available");
return I.get();
}
@@ -457,7 +459,7 @@ void AArch64leTargetMachine::anchor() { }
AArch64leTargetMachine::AArch64leTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
void AArch64beTargetMachine::anchor() { }
@@ -465,7 +467,7 @@ void AArch64beTargetMachine::anchor() { }
AArch64beTargetMachine::AArch64beTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
- std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
+ std::optional<CodeModel::Model> CM, CodeGenOptLevel OL, bool JIT)
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
namespace {
@@ -475,8 +477,9 @@ class AArch64PassConfig : public TargetPassConfig {
public:
AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
- if (TM.getOptLevel() != CodeGenOpt::None)
+ if (TM.getOptLevel() != CodeGenOptLevel::None)
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
+ setEnableSinkAndFold(EnableSinkFold);
}
AArch64TargetMachine &getAArch64TargetMachine() const {
@@ -553,13 +556,14 @@ void AArch64PassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
// Expand any SVE vector library calls that we can't code generate directly.
- if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)
+ if (EnableSVEIntrinsicOpts &&
+ TM->getOptLevel() == CodeGenOptLevel::Aggressive)
addPass(createSVEIntrinsicOptsPass());
// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass(SimplifyCFGOptions()
.forwardSwitchCondToPhi(true)
.convertSwitchRangeToICmp(true)
@@ -572,14 +576,14 @@ void AArch64PassConfig::addIRPasses() {
//
// Run this before LSR to remove the multiplies involved in computing the
// pointer values N iterations ahead.
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
if (EnableLoopDataPrefetch)
addPass(createLoopDataPrefetchPass());
if (EnableFalkorHWPFFix)
addPass(createFalkorMarkStridedAccessesPass());
}
- if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
+ if (TM->getOptLevel() == CodeGenOptLevel::Aggressive && EnableGEPOpt) {
// Call SeparateConstOffsetFromGEP pass to extract constants within indices
// and lower a GEP with multiple indices to either arithmetic operations or
// multiple GEPs with single index.
@@ -594,19 +598,19 @@ void AArch64PassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
- if (getOptLevel() == CodeGenOpt::Aggressive && EnableSelectOpt)
+ if (getOptLevel() == CodeGenOptLevel::Aggressive && EnableSelectOpt)
addPass(createSelectOptimizePass());
addPass(createAArch64GlobalsTaggingPass());
addPass(createAArch64StackTaggingPass(
- /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
+ /*IsOptNone=*/TM->getOptLevel() == CodeGenOptLevel::None));
// Match complex arithmetic patterns
- if (TM->getOptLevel() >= CodeGenOpt::Default)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Default)
addPass(createComplexDeinterleavingPass(TM));
// Match interleaved memory accesses to ldN/stN intrinsics.
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createInterleavedLoadCombinePass());
addPass(createInterleavedAccessPass());
}
@@ -628,16 +632,17 @@ void AArch64PassConfig::addIRPasses() {
bool AArch64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants
// get a chance to be merged
- if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnablePromoteConstant)
addPass(createAArch64PromoteConstantPass());
// FIXME: On AArch64, this depends on the type.
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
- if ((TM->getOptLevel() != CodeGenOpt::None &&
+ if ((TM->getOptLevel() != CodeGenOptLevel::None &&
EnableGlobalMerge == cl::BOU_UNSET) ||
EnableGlobalMerge == cl::BOU_TRUE) {
- bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
- (EnableGlobalMerge == cl::BOU_UNSET);
+ bool OnlyOptimizeForSize =
+ (TM->getOptLevel() < CodeGenOptLevel::Aggressive) &&
+ (EnableGlobalMerge == cl::BOU_UNSET);
// Merging of extern globals is enabled by default on non-Mach-O as we
// expect it to be generally either beneficial or harmless. On Mach-O it
@@ -658,7 +663,7 @@ bool AArch64PassConfig::addPreISel() {
}
void AArch64PassConfig::addCodeGenPrepare() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createTypePromotionLegacyPass());
TargetPassConfig::addCodeGenPrepare();
}
@@ -669,7 +674,7 @@ bool AArch64PassConfig::addInstSelector() {
// For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
// references to _TLS_MODULE_BASE_ as possible.
if (TM->getTargetTriple().isOSBinFormatELF() &&
- getOptLevel() != CodeGenOpt::None)
+ getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64CleanupLocalDynamicTLSPass());
return false;
@@ -681,7 +686,7 @@ bool AArch64PassConfig::addIRTranslator() {
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
- if (getOptLevel() == CodeGenOpt::None) {
+ if (getOptLevel() == CodeGenOptLevel::None) {
addPass(createAArch64O0PreLegalizerCombiner());
addPass(new Localizer());
} else {
@@ -698,7 +703,7 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
}
void AArch64PassConfig::addPreRegBankSelect() {
- bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
if (!IsOptNone) {
addPass(createAArch64PostLegalizerCombiner(IsOptNone));
if (EnableGISelLoadStoreOptPostLegal)
@@ -714,7 +719,7 @@ bool AArch64PassConfig::addRegBankSelect() {
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect(getOptLevel()));
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64PostSelectOptimize());
return false;
}
@@ -723,7 +728,7 @@ void AArch64PassConfig::addMachineSSAOptimization() {
// Run default MachineSSAOptimization first.
TargetPassConfig::addMachineSSAOptimization();
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64MIPeepholeOptPass());
}
@@ -741,18 +746,19 @@ bool AArch64PassConfig::addILPOpts() {
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());
addPass(createAArch64SIMDInstrOptPass());
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createAArch64StackTaggingPreRAPass());
return true;
}
void AArch64PassConfig::addPreRegAlloc() {
// Change dead register definitions to refer to the zero register.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
// Use AdvSIMD scalar instructions whenever profitable.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
// The AdvSIMD pass may produce copies that can be rewritten to
// be register coalescer friendly.
@@ -762,10 +768,11 @@ void AArch64PassConfig::addPreRegAlloc() {
void AArch64PassConfig::addPostRegAlloc() {
// Remove redundant copy instructions.
- if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
+ if (TM->getOptLevel() != CodeGenOptLevel::None &&
+ EnableRedundantCopyElimination)
addPass(createAArch64RedundantCopyEliminationPass());
- if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
+ if (TM->getOptLevel() != CodeGenOptLevel::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
}
@@ -777,7 +784,7 @@ void AArch64PassConfig::addPreSched2() {
// Expand some pseudo instructions to allow proper scheduling.
addPass(createAArch64ExpandPseudoPass());
// Use load/store pair instructions when possible.
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
if (EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
}
@@ -794,7 +801,7 @@ void AArch64PassConfig::addPreSched2() {
addPass(createAArch64IndirectThunks());
addPass(createAArch64SLSHardeningPass());
- if (TM->getOptLevel() != CodeGenOpt::None) {
+ if (TM->getOptLevel() != CodeGenOptLevel::None) {
if (EnableFalkorHWPFFix)
addPass(createFalkorHWPFFixPass());
}
@@ -804,18 +811,15 @@ void AArch64PassConfig::addPreEmitPass() {
// Machine Block Placement might have created new opportunities when run
// at O3, where the Tail Duplication Threshold is set to 4 instructions.
// Run the load/store optimizer once more.
- if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
+ if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive && EnableLoadStoreOpt)
addPass(createAArch64LoadStoreOptimizationPass());
- if (TM->getOptLevel() >= CodeGenOpt::Aggressive &&
+ if (TM->getOptLevel() >= CodeGenOptLevel::Aggressive &&
EnableAArch64CopyPropagation)
addPass(createMachineCopyPropagationPass(true));
addPass(createAArch64A53Fix835769());
- if (EnableBranchTargets)
- addPass(createAArch64BranchTargetsPass());
-
if (TM->getTargetTriple().isOSWindows()) {
// Identify valid longjmp targets for Windows Control Flow Guard.
addPass(createCFGuardLongjmpPass());
@@ -823,18 +827,21 @@ void AArch64PassConfig::addPreEmitPass() {
addPass(createEHContGuardCatchretPass());
}
- if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
}
void AArch64PassConfig::addPostBBSections() {
+ addPass(createAArch64PointerAuthPass());
+ if (EnableBranchTargets)
+ addPass(createAArch64BranchTargetsPass());
// Relax conditional branch instructions if they're otherwise out of
// range of their destination.
if (BranchRelaxation)
addPass(&BranchRelaxationPassID);
- if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableCompressJumpTables)
addPass(createAArch64CompressJumpTablesPass());
}