summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64/AArch64TargetMachine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AArch64/AArch64TargetMachine.cpp')
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp138
1 files changed, 122 insertions, 16 deletions
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index c52c5544fc7e2..0b6345ff8011b 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -11,13 +11,19 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "AArch64CallLowering.h"
+#include "AArch64RegisterBankInfo.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
#include "AArch64TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
@@ -58,6 +64,11 @@ EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden,
cl::init(true));
static cl::opt<bool>
+EnableRedundantCopyElimination("aarch64-redundant-copy-elim",
+ cl::desc("Enable the redundant copy elimination pass"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
" optimization pass"), cl::init(true), cl::Hidden);
@@ -92,11 +103,19 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));
+static cl::opt<bool>
+ EnableLoopDataPrefetch("aarch64-loop-data-prefetch", cl::Hidden,
+ cl::desc("Enable the loop data prefetch pass"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget);
RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64Target);
+ auto PR = PassRegistry::getPassRegistry();
+ initializeGlobalISel(*PR);
+ initializeAArch64ExpandPseudoPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -114,29 +133,79 @@ static std::string computeDataLayout(const Triple &TT, bool LittleEndian) {
if (TT.isOSBinFormatMachO())
return "e-m:o-i64:64-i128:128-n32:64-S128";
if (LittleEndian)
- return "e-m:e-i64:64-i128:128-n32:64-S128";
- return "E-m:e-i64:64-i128:128-n32:64-S128";
+ return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
+ return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
}
-/// TargetMachine ctor - Create an AArch64 architecture model.
+// Helper function to set up the defaults for reciprocals.
+static void initReciprocals(AArch64TargetMachine& TM, AArch64Subtarget& ST)
+{
+ // For the estimates, convergence is quadratic, so essentially the number of
+ // digits is doubled after each iteration. ARMv8, the minimum architected
+ // accuracy of the initial estimate is 2^-8. Therefore, the number of extra
+ // steps to refine the result for float (23 mantissa bits) and for double
+ // (52 mantissa bits) are 2 and 3, respectively.
+ unsigned ExtraStepsF = 2,
+ ExtraStepsD = ExtraStepsF + 1;
+ bool UseRsqrt = ST.useRSqrt();
+
+ TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
+ TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
+ TM.Options.Reciprocals.setDefaults("vec-sqrtf", UseRsqrt, ExtraStepsF);
+ TM.Options.Reciprocals.setDefaults("vec-sqrtd", UseRsqrt, ExtraStepsD);
+
+ TM.Options.Reciprocals.setDefaults("divf", false, ExtraStepsF);
+ TM.Options.Reciprocals.setDefaults("divd", false, ExtraStepsD);
+ TM.Options.Reciprocals.setDefaults("vec-divf", false, ExtraStepsF);
+ TM.Options.Reciprocals.setDefaults("vec-divd", false, ExtraStepsD);
+}
+
+static Reloc::Model getEffectiveRelocModel(const Triple &TT,
+ Optional<Reloc::Model> RM) {
+ // AArch64 Darwin is always PIC.
+ if (TT.isOSDarwin())
+ return Reloc::PIC_;
+ // On ELF platforms the default static relocation model has a smart enough
+ // linker to cope with referencing external symbols defined in a shared
+ // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+ if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
+ return Reloc::Static;
+ return *RM;
+}
+
+/// Create an AArch64 architecture model.
///
-AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool LittleEndian)
+AArch64TargetMachine::AArch64TargetMachine(
+ const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Optional<Reloc::Model> RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian)
// This nested ternary is horrible, but DL needs to be properly
// initialized before TLInfo is constructed.
: LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
- Options, RM, CM, OL),
+ Options, getEffectiveRelocModel(TT, RM), CM, OL),
TLOF(createTLOF(getTargetTriple())),
- isLittle(LittleEndian) {
+ Subtarget(TT, CPU, FS, *this, LittleEndian) {
+ initReciprocals(*this, Subtarget);
initAsmInfo();
}
AArch64TargetMachine::~AArch64TargetMachine() {}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+struct AArch64GISelActualAccessor : public GISelAccessor {
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
+};
+} // End anonymous namespace.
+#endif
+
const AArch64Subtarget *
AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -156,7 +225,18 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
- isLittle);
+ Subtarget.isLittleEndian());
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+ GISelAccessor *GISel = new GISelAccessor();
+#else
+ AArch64GISelActualAccessor *GISel =
+ new AArch64GISelActualAccessor();
+ GISel->CallLoweringInfo.reset(
+ new AArch64CallLowering(*I->getTargetLowering()));
+ GISel->RegBankInfo.reset(
+ new AArch64RegisterBankInfo(*I->getRegisterInfo()));
+#endif
+ I->setGISelAccessor(*GISel);
}
return I.get();
}
@@ -165,16 +245,16 @@ void AArch64leTargetMachine::anchor() { }
AArch64leTargetMachine::AArch64leTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
+ const TargetOptions &Options, Optional<Reloc::Model> RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL)
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
void AArch64beTargetMachine::anchor() { }
AArch64beTargetMachine::AArch64beTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
+ const TargetOptions &Options, Optional<Reloc::Model> RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL)
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
@@ -194,6 +274,10 @@ public:
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+ bool addIRTranslator() override;
+ bool addRegBankSelect() override;
+#endif
bool addILPOpts() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
@@ -223,6 +307,13 @@ void AArch64PassConfig::addIRPasses() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass());
+ // Run LoopDataPrefetch
+ //
+ // Run this before LSR to remove the multiplies involved in computing the
+ // pointer values N iterations ahead.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
+ addPass(createLoopDataPrefetchPass());
+
TargetPassConfig::addIRPasses();
// Match interleaved memory accesses to ldN/stN intrinsics.
@@ -278,6 +369,17 @@ bool AArch64PassConfig::addInstSelector() {
return false;
}
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+bool AArch64PassConfig::addIRTranslator() {
+ addPass(new IRTranslator());
+ return false;
+}
+bool AArch64PassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
+ return false;
+}
+#endif
+
bool AArch64PassConfig::addILPOpts() {
if (EnableCondOpt)
addPass(createAArch64ConditionOptimizerPass());
@@ -303,6 +405,10 @@ void AArch64PassConfig::addPreRegAlloc() {
}
void AArch64PassConfig::addPostRegAlloc() {
+ // Remove redundant copy instructions.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
+ addPass(createAArch64RedundantCopyEliminationPass());
+
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());