diff options
Diffstat (limited to 'lib/Target/AArch64/AArch64Subtarget.cpp')
-rw-r--r-- | lib/Target/AArch64/AArch64Subtarget.cpp | 109 |
1 files changed, 64 insertions, 45 deletions
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index f6ee8cf47a6a4..7dd8ccbe6c25e 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -11,10 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AArch64Subtarget.h" #include "AArch64InstrInfo.h" #include "AArch64PBQPRegAlloc.h" -#include "AArch64Subtarget.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/TargetRegistry.h" @@ -44,58 +43,83 @@ AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) { CPUString = "generic"; ParseSubtargetFeatures(CPUString, FS); + initializeProperties(); + return *this; } +void AArch64Subtarget::initializeProperties() { + // Initialize CPU specific properties. We should add a tablegen feature for + // this in the future so we can specify it together with the subtarget + // features. + switch (ARMProcFamily) { + case Cyclone: + CacheLineSize = 64; + PrefetchDistance = 280; + MinPrefetchStride = 2048; + MaxPrefetchIterationsAhead = 3; + break; + case CortexA57: + MaxInterleaveFactor = 4; + break; + case ExynosM1: + PrefFunctionAlignment = 4; + PrefLoopAlignment = 3; + break; + case Kryo: + MaxInterleaveFactor = 4; + VectorInsertExtractBaseCost = 2; + CacheLineSize = 128; + PrefetchDistance = 740; + MinPrefetchStride = 1024; + MaxPrefetchIterationsAhead = 11; + break; + case Vulcan: + MaxInterleaveFactor = 4; + break; + case CortexA35: break; + case CortexA53: break; + case CortexA72: break; + case CortexA73: break; + case Others: break; + } +} + AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian) - : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - HasV8_1aOps(false), HasV8_2aOps(false), HasFPARMv8(false), HasNEON(false), - HasCrypto(false), HasCRC(false), HasPerfMon(false), HasFullFP16(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), - StrictAlign(false), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian), - CPUString(CPU), TargetTriple(TT), FrameLowering(), + : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()), + IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), - TLInfo(TM, *this) {} + TLInfo(TM, *this), GISel() {} + +const CallLowering *AArch64Subtarget::getCallLowering() const { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getCallLowering(); +} + +const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getRegBankInfo(); +} -/// ClassifyGlobalReference - Find the target operand flags that describe -/// how a global value should be referenced for the current subtarget. +/// Find the target operand flags that describe how a global value should be +/// referenced for the current subtarget. unsigned char AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM) const { - bool isDef = GV->isStrongDefinitionForLinker(); - + const TargetMachine &TM) const { // MachO large model always goes via a GOT, simply to get a single 8-byte // absolute relocation on all global addresses. if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) return AArch64II::MO_GOT; + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return AArch64II::MO_GOT; + // The small code mode's direct accesses use ADRP, which cannot necessarily // produce the value 0 (if the code is above 4GB). - if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage()) { - // In PIC mode use the GOT, but in absolute mode use a constant pool load. - if (TM.getRelocationModel() == Reloc::Static) - return AArch64II::MO_CONSTPOOL; - else - return AArch64II::MO_GOT; - } - - // If symbol visibility is hidden, the extra load is not needed if - // the symbol is definitely defined in the current translation unit. - - // The handling of non-hidden symbols in PIC mode is rather target-dependent: - // + On MachO, if the symbol is defined in this module the GOT can be - // skipped. - // + On ELF, the R_AARCH64_COPY relocation means that even symbols actually - // defined could end up in unexpected places. Use a GOT. - if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) { - if (isTargetMachO()) - return isDef ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT; - else - // No need to go through the GOT for local symbols on ELF. - return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT; - } + if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage()) + return AArch64II::MO_GOT; return AArch64II::MO_NO_FLAG; } @@ -114,8 +138,7 @@ const char *AArch64Subtarget::getBZeroEntry() const { } void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - MachineInstr *begin, MachineInstr *end, - unsigned NumRegionInstrs) const { + unsigned NumRegionInstrs) const { // LNT run (at least on Cyclone) showed reasonably significant gains for // bi-directional scheduling. 253.perlbmk. Policy.OnlyTopDown = false; @@ -123,8 +146,7 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, // Enabling or Disabling the latency heuristic is a close call: It seems to // help nearly no benchmark on out-of-order architectures, on the other hand // it regresses register pressure on a few benchmarking. - if (isCyclone()) - Policy.DisableLatencyHeuristic = true; + Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; } bool AArch64Subtarget::enableEarlyIfConversion() const { @@ -146,8 +168,5 @@ bool AArch64Subtarget::supportsAddressTopByteIgnored() const { std::unique_ptr<PBQPRAConstraint> AArch64Subtarget::getCustomPBQPConstraints() const { - if (!isCortexA57()) - return nullptr; - - return llvm::make_unique<A57ChainingConstraint>(); + return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr; } |