summaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp289
1 files changed, 289 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
new file mode 100644
index 000000000000..3bc89b91c3f7
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -0,0 +1,289 @@
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64Subtarget.h"
+
+#include "AArch64.h"
+#include "AArch64CallLowering.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64LegalizerInfo.h"
+#include "AArch64PBQPRegAlloc.h"
+#include "AArch64RegisterBankInfo.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/TargetParser.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-subtarget"
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "AArch64GenSubtargetInfo.inc"
+
+static cl::opt<bool>
+EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
+ "converter pass"), cl::init(true), cl::Hidden);
+
+// If OS supports TBI, use this flag to enable it.
+static cl::opt<bool>
+UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
+ "an address is ignored"), cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+ UseNonLazyBind("aarch64-enable-nonlazybind",
+ cl::desc("Call nonlazybind functions via direct GOT load"),
+ cl::init(false), cl::Hidden);
+
+AArch64Subtarget &
+AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
+ StringRef CPUString) {
+ // Determine default and user-specified characteristics
+
+ if (CPUString.empty())
+ CPUString = "generic";
+
+ ParseSubtargetFeatures(CPUString, FS);
+ initializeProperties();
+
+ return *this;
+}
+
+void AArch64Subtarget::initializeProperties() {
+ // Initialize CPU specific properties. We should add a tablegen feature for
+ // this in the future so we can specify it together with the subtarget
+ // features.
+ switch (ARMProcFamily) {
+ case Others:
+ break;
+ case CortexA35:
+ break;
+ case CortexA53:
+ PrefFunctionAlignment = 3;
+ break;
+ case CortexA55:
+ break;
+ case CortexA57:
+ MaxInterleaveFactor = 4;
+ PrefFunctionAlignment = 4;
+ break;
+ case CortexA72:
+ case CortexA73:
+ case CortexA75:
+ case CortexA76:
+ PrefFunctionAlignment = 4;
+ break;
+ case Cyclone:
+ CacheLineSize = 64;
+ PrefetchDistance = 280;
+ MinPrefetchStride = 2048;
+ MaxPrefetchIterationsAhead = 3;
+ break;
+ case ExynosM1:
+ MaxInterleaveFactor = 4;
+ MaxJumpTableSize = 8;
+ PrefFunctionAlignment = 4;
+ PrefLoopAlignment = 3;
+ break;
+ case ExynosM3:
+ MaxInterleaveFactor = 4;
+ MaxJumpTableSize = 20;
+ PrefFunctionAlignment = 5;
+ PrefLoopAlignment = 4;
+ break;
+ case Falkor:
+ MaxInterleaveFactor = 4;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ CacheLineSize = 128;
+ PrefetchDistance = 820;
+ MinPrefetchStride = 2048;
+ MaxPrefetchIterationsAhead = 8;
+ break;
+ case Kryo:
+ MaxInterleaveFactor = 4;
+ VectorInsertExtractBaseCost = 2;
+ CacheLineSize = 128;
+ PrefetchDistance = 740;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 11;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
+ case Saphira:
+ MaxInterleaveFactor = 4;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
+ case ThunderX2T99:
+ CacheLineSize = 64;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
+ MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 4;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
+ case ThunderX:
+ case ThunderXT88:
+ case ThunderXT81:
+ case ThunderXT83:
+ CacheLineSize = 128;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
+ // FIXME: remove this to enable 64-bit SLP if performance looks good.
+ MinVectorRegisterBitWidth = 128;
+ break;
+ case TSV110:
+ CacheLineSize = 64;
+ PrefFunctionAlignment = 4;
+ PrefLoopAlignment = 2;
+ break;
+ }
+}
+
+AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
+ const std::string &FS,
+ const TargetMachine &TM, bool LittleEndian)
+ : AArch64GenSubtargetInfo(TT, CPU, FS),
+ ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
+ CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
+ IsLittle(LittleEndian),
+ TargetTriple(TT), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
+ TLInfo(TM, *this) {
+ if (AArch64::isX18ReservedByDefault(TT))
+ ReserveXRegister.set(18);
+
+ CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
+ Legalizer.reset(new AArch64LegalizerInfo(*this));
+
+ auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
+
+ // FIXME: At this point, we can't rely on Subtarget having RBI.
+ // It's awkward to mix passing RBI and the Subtarget; should we pass
+ // TII/TRI as well?
+ InstSelector.reset(createAArch64InstructionSelector(
+ *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
+
+ RegBankInfo.reset(RBI);
+}
+
+const CallLowering *AArch64Subtarget::getCallLowering() const {
+ return CallLoweringInfo.get();
+}
+
+const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
+ return InstSelector.get();
+}
+
+const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
+ return Legalizer.get();
+}
+
+const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
+ return RegBankInfo.get();
+}
+
+/// Find the target operand flags that describe how a global value should be
+/// referenced for the current subtarget.
+unsigned char
+AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // MachO large model always goes via a GOT, simply to get a single 8-byte
+ // absolute relocation on all global addresses.
+ if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
+ return AArch64II::MO_GOT;
+
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
+ if (GV->hasDLLImportStorageClass())
+ return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
+ if (getTargetTriple().isOSWindows())
+ return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
+ return AArch64II::MO_GOT;
+ }
+
+ // The small code model's direct accesses use ADRP, which cannot
+ // necessarily produce the value 0 (if the code is above 4GB).
+ // Same for the tiny code model, where we have a pc relative LDR.
+ if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
+ GV->hasExternalWeakLinkage())
+ return AArch64II::MO_GOT;
+
+ return AArch64II::MO_NO_FLAG;
+}
+
+unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
+ const GlobalValue *GV, const TargetMachine &TM) const {
+ // MachO large model always goes via a GOT, because we don't have the
+ // relocations available to do anything else..
+ if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
+ !GV->hasInternalLinkage())
+ return AArch64II::MO_GOT;
+
+ // NonLazyBind goes via GOT unless we know it's available locally.
+ auto *F = dyn_cast<Function>(GV);
+ if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
+ !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return AArch64II::MO_GOT;
+
+ return AArch64II::MO_NO_FLAG;
+}
+
+void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const {
+ // LNT run (at least on Cyclone) showed reasonably significant gains for
+ // bi-directional scheduling. 253.perlbmk.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ // Enabling or Disabling the latency heuristic is a close call: It seems to
+ // help nearly no benchmark on out-of-order architectures, on the other hand
+ // it regresses register pressure on a few benchmarking.
+ Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
+}
+
+bool AArch64Subtarget::enableEarlyIfConversion() const {
+ return EnableEarlyIfConvert;
+}
+
+bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
+ if (!UseAddressTopByteIgnored)
+ return false;
+
+ if (TargetTriple.isiOS()) {
+ unsigned Major, Minor, Micro;
+ TargetTriple.getiOSVersion(Major, Minor, Micro);
+ return Major >= 8;
+ }
+
+ return false;
+}
+
+std::unique_ptr<PBQPRAConstraint>
+AArch64Subtarget::getCustomPBQPConstraints() const {
+ return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
+}
+
+void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
+ // We usually compute max call frame size after ISel. Do the computation now
+ // if the .mir file didn't specify it. Note that this will probably give you
+ // bogus values after PEI has eliminated the callframe setup/destroy pseudo
+ // instructions, specify explicitly if you need it to be correct.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isMaxCallFrameSizeComputed())
+ MFI.computeMaxCallFrameSize(MF);
+}