diff options
Diffstat (limited to 'lib/Target/NVPTX/NVPTXTargetMachine.cpp')
-rw-r--r-- | lib/Target/NVPTX/NVPTXTargetMachine.cpp | 129 |
1 files changed, 92 insertions, 37 deletions
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index aa931b134da9..b9f5919964c7 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" @@ -44,15 +45,23 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" using namespace llvm; +static cl::opt<bool> UseInferAddressSpaces( + "nvptx-use-infer-addrspace", cl::init(false), cl::Hidden, + cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of " + "NVPTXFavorNonGenericAddrSpaces")); + namespace llvm { +void initializeNVVMIntrRangePass(PassRegistry&); void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +void initializeNVPTXInferAddressSpacesPass(PassRegistry &); void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerKernelArgsPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); @@ -67,10 +76,12 @@ extern "C" void LLVMInitializeNVPTXTarget() { // but it's very NVPTX-specific. PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeNVVMReflectPass(PR); + initializeNVVMIntrRangePass(PR); initializeGenericToNVVMPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); initializeNVPTXFavorNonGenericAddrSpacesPass(PR); + initializeNVPTXInferAddressSpacesPass(PR); initializeNVPTXLowerKernelArgsPass(PR); initializeNVPTXLowerAllocaPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); @@ -90,11 +101,15 @@ static std::string computeDataLayout(bool is64Bit) { NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, - CM, OL), - is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), + // The pic relocation model is used regardless of what the client has + // specified, as it is the only relocation model currently supported. + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, + Reloc::PIC_, CM, OL), + is64bit(is64bit), + TLOF(make_unique<NVPTXTargetObjectFile>()), Subtarget(TT, CPU, FS, *this) { if (TT.getOS() == Triple::NVCL) drvInterface = NVPTX::NVCL; @@ -110,7 +125,8 @@ void NVPTXTargetMachine32::anchor() {} NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} @@ -119,7 +135,8 @@ void NVPTXTargetMachine64::anchor() {} NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} @@ -143,14 +160,25 @@ public: void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; private: - // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. + // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This + // function is only called in opt mode. void addEarlyCSEOrGVNPass(); + + // Add passes that propagate special memory spaces. + void addAddressSpaceInferencePasses(); + + // Add passes that perform straight-line scalar optimizations. + void addStraightLineScalarOptimizationPasses(); }; } // end anonymous namespace TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { - NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); - return PassConfig; + return new NVPTXPassConfig(this, PM); +} + +void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) { + PM.add(createNVVMReflectPass()); + PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { @@ -166,34 +194,23 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() { addPass(createEarlyCSEPass()); } -void NVPTXPassConfig::addIRPasses() { - // The following passes are known to not play well with virtual regs hanging - // around after register allocation (which in our case, is *all* registers). - // We explicitly disable them here. We do, however, need some functionality - // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the - // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). - disablePass(&PrologEpilogCodeInserterID); - disablePass(&MachineCopyPropagationID); - disablePass(&TailDuplicateID); - - addPass(createNVVMReflectPass()); - addPass(createNVPTXImageOptimizerPass()); - addPass(createNVPTXAssignValidGlobalNamesPass()); - addPass(createGenericToNVVMPass()); - - // === Propagate special address spaces === - addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); +void NVPTXPassConfig::addAddressSpaceInferencePasses() { // NVPTXLowerKernelArgs emits alloca for byval parameters which can often // be eliminated by SROA. addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); - addPass(createNVPTXFavorNonGenericAddrSpacesPass()); - // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave - // them unused. We could remove dead code in an ad-hoc manner, but that - // requires manual work and might be error-prone. - addPass(createDeadCodeEliminationPass()); + if (UseInferAddressSpaces) { + addPass(createNVPTXInferAddressSpacesPass()); + } else { + addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave + // them unused. We could remove dead code in an ad-hoc manner, but that + // requires manual work and might be error-prone. + addPass(createDeadCodeEliminationPass()); + } +} - // === Straight-line scalar optimizations === +void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { addPass(createSeparateConstOffsetFromGEPPass()); addPass(createSpeculativeExecutionPass()); // ReassociateGEPs exposes more opportunites for SLSR. See @@ -208,6 +225,41 @@ void NVPTXPassConfig::addIRPasses() { // NaryReassociate on GEPs creates redundant common expressions, so run // EarlyCSE after it. addPass(createEarlyCSEPass()); +} + +void NVPTXPassConfig::addIRPasses() { + // The following passes are known to not play well with virtual regs hanging + // around after register allocation (which in our case, is *all* registers). + // We explicitly disable them here. We do, however, need some functionality + // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the + // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). + disablePass(&PrologEpilogCodeInserterID); + disablePass(&MachineCopyPropagationID); + disablePass(&TailDuplicateID); + disablePass(&StackMapLivenessID); + disablePass(&LiveDebugValuesID); + disablePass(&PostRASchedulerID); + disablePass(&FuncletLayoutID); + disablePass(&PatchableFunctionID); + + // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running + // it here does nothing. But since we need it for correctness when lowering + // to NVPTX, run it here too, in case whoever built our pass pipeline didn't + // call addEarlyAsPossiblePasses. + addPass(createNVVMReflectPass()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createNVPTXImageOptimizerPass()); + addPass(createNVPTXAssignValidGlobalNamesPass()); + addPass(createGenericToNVVMPass()); + + // NVPTXLowerKernelArgs is required for correctness and should be run right + // before the address space inference passes. + addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); + if (getOptLevel() != CodeGenOpt::None) { + addAddressSpaceInferencePasses(); + addStraightLineScalarOptimizationPasses(); + } // === LSR and other generic IR passes === TargetPassConfig::addIRPasses(); @@ -223,7 +275,8 @@ void NVPTXPassConfig::addIRPasses() { // %1 = shl %a, 2 // // but EarlyCSE can do neither of them. - addEarlyCSEOrGVNPass(); + if (getOptLevel() != CodeGenOpt::None) + addEarlyCSEOrGVNPass(); } bool NVPTXPassConfig::addInstSelector() { @@ -241,10 +294,12 @@ bool NVPTXPassConfig::addInstSelector() { void NVPTXPassConfig::addPostRegAlloc() { addPass(createNVPTXPrologEpilogPass(), false); - // NVPTXPrologEpilogPass calculates frame object offset and replace frame - // index with VRFrame register. NVPTXPeephole need to be run after that and - // will replace VRFrame with VRFrameLocal when possible. - addPass(createNVPTXPeephole()); + if (getOptLevel() != CodeGenOpt::None) { + // NVPTXPrologEpilogPass calculates frame object offset and replace frame + // index with VRFrame register. NVPTXPeephole need to be run after that and + // will replace VRFrame with VRFrameLocal when possible. + addPass(createNVPTXPeephole()); + } } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { |