diff options
Diffstat (limited to 'lib/Target/ARM/ARMSubtarget.cpp')
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.cpp | 73 |
1 files changed, 62 insertions, 11 deletions
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b1d0761e3231..978faed776b0 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -1,9 +1,8 @@ //===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -93,10 +92,12 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const ARMBaseTargetMachine &TM, bool IsLittle) + const ARMBaseTargetMachine &TM, bool IsLittle, + bool MinSize) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), - TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), + CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle), + TargetTriple(TT), Options(TM.Options), TM(TM), + FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -283,6 +284,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexA72: case CortexA73: case CortexA75: + case CortexA76: case CortexR4: case CortexR4F: case CortexR5: @@ -359,6 +361,13 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { } bool ARMSubtarget::enableMachineScheduler() const { + // The MachineScheduler can increase register usage, so we use more high + // registers and end up with more T2 instructions that cannot be converted to + // T1 instructions. At least until we do better at converting to thumb1 + // instructions, on cortex-m at Oz where we are size-paranoid, don't use the + // Machine scheduler, relying on the DAG register pressure scheduler instead. + if (isMClass() && hasMinSize()) + return false; // Enable the MachineScheduler before register allocation for subtargets // with the use-misched feature. return useMachineScheduler(); @@ -374,20 +383,20 @@ bool ARMSubtarget::enablePostRAScheduler() const { bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } -bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { +bool ARMSubtarget::useStride4VFPs() const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind // format which it's more important to get right. return isTargetWatchABI() || - (useWideStrideVFP() && !MF.getFunction().optForMinSize()); + (useWideStrideVFP() && !OptMinSize); } -bool ARMSubtarget::useMovt(const MachineFunction &MF) const { +bool ARMSubtarget::useMovt() const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of // range otherwise. return !NoMovt && hasV8MBaselineOps() && - (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly()); + (isTargetWindows() || !OptMinSize || genExecuteOnly()); } bool ARMSubtarget::useFastISel() const { @@ -404,3 +413,45 @@ bool ARMSubtarget::useFastISel() const { ((isTargetMachO() && !isThumb1Only()) || (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb())); } + +unsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const { + // The GPR register class has multiple possible allocation orders, with + // tradeoffs preferred by different sub-architectures and optimisation goals. + // The allocation orders are: + // 0: (the default tablegen order, not used) + // 1: r14, r0-r13 + // 2: r0-r7 + // 3: r0-r7, r12, lr, r8-r11 + // Note that the register allocator will change this order so that + // callee-saved registers are used later, as they require extra work in the + // prologue/epilogue (though we sometimes override that). + + // For thumb1-only targets, only the low registers are allocatable. + if (isThumb1Only()) + return 2; + + // Allocate low registers first, so we can select more 16-bit instructions. + // We also (in ignoreCSRForAllocationOrder) override the default behaviour + // with regards to callee-saved registers, because pushing extra registers is + // much cheaper (in terms of code size) than using high registers. After + // that, we allocate r12 (doesn't need to be saved), lr (saving it means we + // can return with the pop, don't need an extra "bx lr") and then the rest of + // the high registers. + if (isThumb2() && MF.getFunction().hasMinSize()) + return 3; + + // Otherwise, allocate in the default order, using LR first because saving it + // allows a shorter epilogue sequence. + return 1; +} + +bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF, + unsigned PhysReg) const { + // To minimize code size in Thumb2, we prefer the usage of low regs (lower + // cost per use) so we can use narrow encoding. By default, caller-saved + // registers (e.g. lr, r12) are always allocated first, regardless of + // their cost per use. When optForMinSize, we prefer the low regs even if + // they are CSR because usually push/pop can be folded into existing ones. + return isThumb2() && MF.getFunction().hasMinSize() && + ARM::GPRRegClass.contains(PhysReg); +} |