diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp new file mode 100644 index 000000000000..3bc89b91c3f7 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -0,0 +1,289 @@ +//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "AArch64Subtarget.h" + +#include "AArch64.h" +#include "AArch64CallLowering.h" +#include "AArch64InstrInfo.h" +#include "AArch64LegalizerInfo.h" +#include "AArch64PBQPRegAlloc.h" +#include "AArch64RegisterBankInfo.h" +#include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/TargetParser.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-subtarget" + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "AArch64GenSubtargetInfo.inc" + +static cl::opt<bool> +EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " + "converter pass"), cl::init(true), cl::Hidden); + +// If OS supports TBI, use this flag to enable it. +static cl::opt<bool> +UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " + "an address is ignored"), cl::init(false), cl::Hidden); + +static cl::opt<bool> + UseNonLazyBind("aarch64-enable-nonlazybind", + cl::desc("Call nonlazybind functions via direct GOT load"), + cl::init(false), cl::Hidden); + +AArch64Subtarget & +AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, + StringRef CPUString) { + // Determine default and user-specified characteristics + + if (CPUString.empty()) + CPUString = "generic"; + + ParseSubtargetFeatures(CPUString, FS); + initializeProperties(); + + return *this; +} + +void AArch64Subtarget::initializeProperties() { + // Initialize CPU specific properties. We should add a tablegen feature for + // this in the future so we can specify it together with the subtarget + // features. + switch (ARMProcFamily) { + case Others: + break; + case CortexA35: + break; + case CortexA53: + PrefFunctionAlignment = 3; + break; + case CortexA55: + break; + case CortexA57: + MaxInterleaveFactor = 4; + PrefFunctionAlignment = 4; + break; + case CortexA72: + case CortexA73: + case CortexA75: + case CortexA76: + PrefFunctionAlignment = 4; + break; + case Cyclone: + CacheLineSize = 64; + PrefetchDistance = 280; + MinPrefetchStride = 2048; + MaxPrefetchIterationsAhead = 3; + break; + case ExynosM1: + MaxInterleaveFactor = 4; + MaxJumpTableSize = 8; + PrefFunctionAlignment = 4; + PrefLoopAlignment = 3; + break; + case ExynosM3: + MaxInterleaveFactor = 4; + MaxJumpTableSize = 20; + PrefFunctionAlignment = 5; + PrefLoopAlignment = 4; + break; + case Falkor: + MaxInterleaveFactor = 4; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + CacheLineSize = 128; + PrefetchDistance = 820; + MinPrefetchStride = 2048; + MaxPrefetchIterationsAhead = 8; + break; + case Kryo: + MaxInterleaveFactor = 4; + VectorInsertExtractBaseCost = 2; + CacheLineSize = 128; + PrefetchDistance = 740; + MinPrefetchStride = 1024; + MaxPrefetchIterationsAhead = 11; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + break; + case Saphira: + MaxInterleaveFactor = 4; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + break; + case ThunderX2T99: + CacheLineSize = 64; + PrefFunctionAlignment = 3; + PrefLoopAlignment = 2; + MaxInterleaveFactor = 4; + PrefetchDistance = 128; + MinPrefetchStride = 1024; + MaxPrefetchIterationsAhead = 4; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + break; + case ThunderX: + case ThunderXT88: + case ThunderXT81: + case ThunderXT83: + CacheLineSize = 128; + PrefFunctionAlignment = 3; + PrefLoopAlignment = 2; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + break; + case TSV110: + CacheLineSize = 64; + PrefFunctionAlignment = 4; + PrefLoopAlignment = 2; + break; + } +} + +AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, + const TargetMachine &TM, bool LittleEndian) + : AArch64GenSubtargetInfo(TT, CPU, FS), + ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), + CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), + IsLittle(LittleEndian), + TargetTriple(TT), FrameLowering(), + InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), + TLInfo(TM, *this) { + if (AArch64::isX18ReservedByDefault(TT)) + ReserveXRegister.set(18); + + CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); + Legalizer.reset(new AArch64LegalizerInfo(*this)); + + auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); + + // FIXME: At this point, we can't rely on Subtarget having RBI. + // It's awkward to mix passing RBI and the Subtarget; should we pass + // TII/TRI as well? + InstSelector.reset(createAArch64InstructionSelector( + *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); + + RegBankInfo.reset(RBI); +} + +const CallLowering *AArch64Subtarget::getCallLowering() const { + return CallLoweringInfo.get(); +} + +const InstructionSelector *AArch64Subtarget::getInstructionSelector() const { + return InstSelector.get(); +} + +const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { + return Legalizer.get(); +} + +const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { + return RegBankInfo.get(); +} + +/// Find the target operand flags that describe how a global value should be +/// referenced for the current subtarget. +unsigned char +AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM) const { + // MachO large model always goes via a GOT, simply to get a single 8-byte + // absolute relocation on all global addresses. + if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) + return AArch64II::MO_GOT; + + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) { + if (GV->hasDLLImportStorageClass()) + return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; + if (getTargetTriple().isOSWindows()) + return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; + return AArch64II::MO_GOT; + } + + // The small code model's direct accesses use ADRP, which cannot + // necessarily produce the value 0 (if the code is above 4GB). + // Same for the tiny code model, where we have a pc relative LDR. + if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && + GV->hasExternalWeakLinkage()) + return AArch64II::MO_GOT; + + return AArch64II::MO_NO_FLAG; +} + +unsigned char AArch64Subtarget::classifyGlobalFunctionReference( + const GlobalValue *GV, const TargetMachine &TM) const { + // MachO large model always goes via a GOT, because we don't have the + // relocations available to do anything else.. + if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && + !GV->hasInternalLinkage()) + return AArch64II::MO_GOT; + + // NonLazyBind goes via GOT unless we know it's available locally. + auto *F = dyn_cast<Function>(GV); + if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && + !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return AArch64II::MO_GOT; + + return AArch64II::MO_NO_FLAG; +} + +void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + unsigned NumRegionInstrs) const { + // LNT run (at least on Cyclone) showed reasonably significant gains for + // bi-directional scheduling. 253.perlbmk. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + // Enabling or Disabling the latency heuristic is a close call: It seems to + // help nearly no benchmark on out-of-order architectures, on the other hand + // it regresses register pressure on a few benchmarking. + Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; +} + +bool AArch64Subtarget::enableEarlyIfConversion() const { + return EnableEarlyIfConvert; +} + +bool AArch64Subtarget::supportsAddressTopByteIgnored() const { + if (!UseAddressTopByteIgnored) + return false; + + if (TargetTriple.isiOS()) { + unsigned Major, Minor, Micro; + TargetTriple.getiOSVersion(Major, Minor, Micro); + return Major >= 8; + } + + return false; +} + +std::unique_ptr<PBQPRAConstraint> +AArch64Subtarget::getCustomPBQPConstraints() const { + return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr; +} + +void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { + // We usually compute max call frame size after ISel. Do the computation now + // if the .mir file didn't specify it. Note that this will probably give you + // bogus values after PEI has eliminated the callframe setup/destroy pseudo + // instructions, specify explicitly if you need it to be correct. + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isMaxCallFrameSizeComputed()) + MFI.computeMaxCallFrameSize(MF); +} |
