1 files changed, 92 insertions, 37 deletions
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index aa931b134da9..b9f5919964c7 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -44,15 +45,23 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
 
 using namespace llvm;
 
+static cl::opt<bool> UseInferAddressSpaces(
+    "nvptx-use-infer-addrspace", cl::init(false), cl::Hidden,
+    cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "
+             "NVPTXFavorNonGenericAddrSpaces"));
+
 namespace llvm {
+void initializeNVVMIntrRangePass(PassRegistry&);
 void initializeNVVMReflectPass(PassRegistry&);
 void initializeGenericToNVVMPass(PassRegistry&);
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
@@ -67,10 +76,12 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   // but it's very NVPTX-specific.
   PassRegistry &PR = *PassRegistry::getPassRegistry();
   initializeNVVMReflectPass(PR);
+  initializeNVVMIntrRangePass(PR);
   initializeGenericToNVVMPass(PR);
   initializeNVPTXAllocaHoistingPass(PR);
   initializeNVPTXAssignValidGlobalNamesPass(PR);
   initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+  initializeNVPTXInferAddressSpacesPass(PR);
   initializeNVPTXLowerKernelArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
@@ -90,11 +101,15 @@ static std::string computeDataLayout(bool is64Bit) {
 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
                                        const TargetOptions &Options,
-                                       Reloc::Model RM, CodeModel::Model CM,
+                                       Optional<Reloc::Model> RM,
+                                       CodeModel::Model CM,
                                        CodeGenOpt::Level OL, bool is64bit)
-    : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
-                        CM, OL),
-      is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+    // The pic relocation model is used regardless of what the client has
+    // specified, as it is the only relocation model currently supported.
+    : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
+                        Reloc::PIC_, CM, OL),
+      is64bit(is64bit),
+      TLOF(make_unique<NVPTXTargetObjectFile>()),
       Subtarget(TT, CPU, FS, *this) {
   if (TT.getOS() == Triple::NVCL)
     drvInterface = NVPTX::NVCL;
@@ -110,7 +125,8 @@ void NVPTXTargetMachine32::anchor() {}
 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
+                                           Optional<Reloc::Model> RM,
+                                           CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
@@ -119,7 +135,8 @@ void NVPTXTargetMachine64::anchor() {}
 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
                                            const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
+                                           Optional<Reloc::Model> RM,
+                                           CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
@@ -143,14 +160,25 @@ public:
   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
 
 private:
-  // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+  // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
+  // function is only called in opt mode.
   void addEarlyCSEOrGVNPass();
+
+  // Add passes that propagate special memory spaces.
+  void addAddressSpaceInferencePasses();
+
+  // Add passes that perform straight-line scalar optimizations.
+  void addStraightLineScalarOptimizationPasses();
 };
 } // end anonymous namespace
 
 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
-  NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
-  return PassConfig;
+  return new NVPTXPassConfig(this, PM);
+}
+
+void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
+  PM.add(createNVVMReflectPass());
+  PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
 }
 
 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
@@ -166,34 +194,23 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
     addPass(createEarlyCSEPass());
 }
 
-void NVPTXPassConfig::addIRPasses() {
-  // The following passes are known to not play well with virtual regs hanging
-  // around after register allocation (which in our case, is *all* registers).
-  // We explicitly disable them here.  We do, however, need some functionality
-  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
-  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
-  disablePass(&PrologEpilogCodeInserterID);
-  disablePass(&MachineCopyPropagationID);
-  disablePass(&TailDuplicateID);
-
-  addPass(createNVVMReflectPass());
-  addPass(createNVPTXImageOptimizerPass());
-  addPass(createNVPTXAssignValidGlobalNamesPass());
-  addPass(createGenericToNVVMPass());
-
-  // === Propagate special address spaces ===
-  addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+void NVPTXPassConfig::addAddressSpaceInferencePasses() {
   // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
   // be eliminated by SROA.
   addPass(createSROAPass());
   addPass(createNVPTXLowerAllocaPass());
-  addPass(createNVPTXFavorNonGenericAddrSpacesPass());
-  // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
-  // them unused. We could remove dead code in an ad-hoc manner, but that
-  // requires manual work and might be error-prone.
-  addPass(createDeadCodeEliminationPass());
+  if (UseInferAddressSpaces) {
+    addPass(createNVPTXInferAddressSpacesPass());
+  } else {
+    addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+    // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
+    // them unused. We could remove dead code in an ad-hoc manner, but that
+    // requires manual work and might be error-prone.
+    addPass(createDeadCodeEliminationPass());
+  }
+}
 
-  // === Straight-line scalar optimizations ===
+void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
   addPass(createSeparateConstOffsetFromGEPPass());
   addPass(createSpeculativeExecutionPass());
   // ReassociateGEPs exposes more opportunites for SLSR. See
@@ -208,6 +225,41 @@ void NVPTXPassConfig::addIRPasses() {
   // NaryReassociate on GEPs creates redundant common expressions, so run
   // EarlyCSE after it.
   addPass(createEarlyCSEPass());
+}
+
+void NVPTXPassConfig::addIRPasses() {
+  // The following passes are known to not play well with virtual regs hanging
+  // around after register allocation (which in our case, is *all* registers).
+  // We explicitly disable them here.  We do, however, need some functionality
+  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
+  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
+  disablePass(&PrologEpilogCodeInserterID);
+  disablePass(&MachineCopyPropagationID);
+  disablePass(&TailDuplicateID);
+  disablePass(&StackMapLivenessID);
+  disablePass(&LiveDebugValuesID);
+  disablePass(&PostRASchedulerID);
+  disablePass(&FuncletLayoutID);
+  disablePass(&PatchableFunctionID);
+
+  // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
+  // it here does nothing.  But since we need it for correctness when lowering
+  // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
+  // call addEarlyAsPossiblePasses.
+  addPass(createNVVMReflectPass());
+
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createNVPTXImageOptimizerPass());
+  addPass(createNVPTXAssignValidGlobalNamesPass());
+  addPass(createGenericToNVVMPass());
+
+  // NVPTXLowerKernelArgs is required for correctness and should be run right
+  // before the address space inference passes.
+  addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+  if (getOptLevel() != CodeGenOpt::None) {
+    addAddressSpaceInferencePasses();
+    addStraightLineScalarOptimizationPasses();
+  }
 
   // === LSR and other generic IR passes ===
   TargetPassConfig::addIRPasses();
@@ -223,7 +275,8 @@ void NVPTXPassConfig::addIRPasses() {
   //   %1 = shl %a, 2
   //
   // but EarlyCSE can do neither of them.
-  addEarlyCSEOrGVNPass();
+  if (getOptLevel() != CodeGenOpt::None)
+    addEarlyCSEOrGVNPass();
 }
 
 bool NVPTXPassConfig::addInstSelector() {
@@ -241,10 +294,12 @@ bool NVPTXPassConfig::addInstSelector() {
 
 void NVPTXPassConfig::addPostRegAlloc() {
   addPass(createNVPTXPrologEpilogPass(), false);
-  // NVPTXPrologEpilogPass calculates frame object offset and replace frame
-  // index with VRFrame register. NVPTXPeephole need to be run after that and
-  // will replace VRFrame with VRFrameLocal when possible.
-  addPass(createNVPTXPeephole());
+  if (getOptLevel() != CodeGenOpt::None) {
+    // NVPTXPrologEpilogPass calculates frame object offset and replace frame
+    // index with VRFrame register. NVPTXPeephole need to be run after that and
+    // will replace VRFrame with VRFrameLocal when possible.
+    addPass(createNVPTXPeephole());
+  }
 }
 
 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {