summaryrefslogtreecommitdiff
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-07-05 14:21:36 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-07-05 14:21:36 +0000
commit1a82d4c088707c791c792f6822f611b47a12bdfe (patch)
tree7c411f9b5d807f7f204fdd16965d8925a82b6d18 /lib/Target/NVPTX
parent3a0822f094b578157263e04114075ad7df81db41 (diff)
Notes
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp5
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h2
-rw-r--r--lib/Target/NVPTX/NVPTX.h11
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp46
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h2
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h2
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp154
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td2
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h2
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp2
27 files changed, 223 insertions, 51 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 99e950eba80ff..05fe06dbc07cf 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources
NVPTXLowerAggrCopies.cpp
NVPTXLowerKernelArgs.cpp
NVPTXLowerAlloca.cpp
+ NVPTXPeephole.cpp
NVPTXMCExpr.cpp
NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 8144f3fde730c..02c5a210d0997 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -49,6 +49,6 @@ public:
raw_ostream &O, const char *Modifier = nullptr);
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index b55664ed32a77..a72ae2ef53a73 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -94,7 +94,7 @@ enum {
IsSurfTexQueryFlag = 0x800,
IsTexModeUnifiedFlag = 0x1000
};
-} // namespace NVPTXII
-} // namespace llvm
+}
+}
#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 8a28b089ce357..221d2f093aebf 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -54,7 +54,10 @@ createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->initMCCodeGenInfo(RM, CM, OL);
+
+ // The default relocation model is used regardless of what the client has
+ // specified, as it is the only relocation model currently supported.
+ X->initMCCodeGenInfo(Reloc::Default, CM, OL);
return X;
}
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index 1480b61afdbe7..a2d670f8d39d2 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -43,6 +43,6 @@ public:
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index d06d61f5e5507..fe28214e95880 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -71,6 +71,7 @@ MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
BasicBlockPass *createNVPTXLowerAllocaPass();
+MachineFunctionPass *createNVPTXPeephole();
bool isImageOrSamplerVal(const Value *, const Module *);
@@ -133,7 +134,7 @@ enum VecType {
V2 = 2,
V4 = 4
};
-} // namespace PTXLdStInstCode
+}
/// PTXCvtMode - Conversion code enumeration
namespace PTXCvtMode {
@@ -152,7 +153,7 @@ enum CvtMode {
FTZ_FLAG = 0x10,
SAT_FLAG = 0x20
};
-} // namespace PTXCvtMode
+}
/// PTXCmpMode - Comparison mode enumeration
namespace PTXCmpMode {
@@ -180,9 +181,9 @@ enum CmpMode {
BASE_MASK = 0xFF,
FTZ_FLAG = 0x100
};
-} // namespace PTXCmpMode
-} // namespace NVPTX
-} // namespace llvm
+}
+}
+} // end namespace llvm;
// Defines symbolic names for NVPTX registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 1a1a8ca7c6664..cadd7a46cd9d2 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -109,7 +109,7 @@ void VisitGlobalVariableForEmission(
Visited.insert(GV);
Visiting.erase(GV);
}
-} // namespace
+}
void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
if (!EmitLineNumbers)
@@ -826,7 +826,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(TM.getDataLayout());
+ Mang = new Mangler();
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1, STI);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 12d80a34a4e85..f6f7685e76f96 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -349,6 +349,6 @@ public:
DebugLoc prevDebugLoc;
void emitLineNumberAsDotLoc(const MachineInstr &);
};
-} // namespace llvm
+} // end of namespace
#endif
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 2d5e74c4c4bf2..7d4be8e809cf0 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -38,7 +38,7 @@ public:
/// \brief Clean up the name to remove symbols invalid in PTX.
std::string cleanUpName(StringRef Name);
};
-} // namespace
+}
char NVPTXAssignValidGlobalNames::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 3eb7024ff08a5..69a229e32f438 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -107,7 +107,7 @@ private:
/// Helper function for bitcasts.
Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
};
-} // namespace
+}
char NVPTXFavorNonGenericAddrSpaces::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 5503494fc3c89..9b34aef3fdec0 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -35,35 +35,33 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
if (MF.getFrameInfo()->hasStackObjects()) {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
- // Insert "mov.u32 %SP, %Depot"
- MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineInstr *MI = MBB.begin();
+ MachineRegisterInfo &MR = MF.getRegInfo();
+
// This instruction really occurs before first instruction
// in the BB, so giving it no debug location.
DebugLoc dl = DebugLoc();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- // mov %SPL, %depot;
- // cvta.local %SP, %SPL;
- if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
- unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
- MachineInstr *MI =
- BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
- NVPTX::cvta_local_yes_64),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl,
- MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
- LocalReg).addImm(MF.getFunctionNumber());
- } else {
- unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
- MachineInstr *MI =
- BuildMI(MBB, MBBI, dl,
- MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl,
- MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
- LocalReg).addImm(MF.getFunctionNumber());
+ // Emits
+ // mov %SPL, %depot;
+ // cvta.local %SP, %SPL;
+ // for local address accesses in MF.
+ bool Is64Bit =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
+ unsigned CvtaLocalOpcode =
+ (Is64Bit ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes);
+ unsigned MovDepotOpcode =
+ (Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
+ if (!MR.use_empty(NVPTX::VRFrame)) {
+ // If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
+ MI = BuildMI(MBB, MI, dl,
+ MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
+ NVPTX::VRFrame)
+ .addReg(NVPTX::VRFrameLocal);
}
+ BuildMI(MBB, MI, dl, MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
+ NVPTX::VRFrameLocal)
+ .addImm(MF.getFunctionNumber());
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 488edecc6e7b8..14f8bb7b98fec 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -31,6 +31,6 @@ public:
MachineBasicBlock::iterator I) const override;
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 5879df31f8a64..fe20580c83a20 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -95,6 +95,6 @@ private:
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
};
-} // namespace
+}
#endif
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b5af72ab855aa..09e0bd5d3d883 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -206,7 +206,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
// Turn FP truncstore into trunc + store.
+ // FIXME: vector types should also be expanded
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 276851f872eab..ed94775b3002f 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -427,7 +427,7 @@ enum NodeType : unsigned {
Suld3DV4I16Zero,
Suld3DV4I32Zero
};
-} // namespace NVPTXISD
+}
class NVPTXSubtarget;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index c86f861acd557..aa36b6be7250d 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -42,7 +42,7 @@ private:
Value *cleanupValue(Value *V);
void replaceWith(Instruction *From, ConstantInt *To);
};
-} // namespace
+}
char NVPTXImageOptimizer::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index 24dcb122b94eb..b533f316d8a98 100644
--- a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -132,6 +132,10 @@ void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
assert(!Arg->hasByValAttr() &&
"byval params should be handled by handleByValParam");
+ // Do nothing if the argument already points to the global address space.
+ if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+ return;
+
Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
Instruction *ArgInGlobal = new AddrSpaceCastInst(
Arg, PointerType::get(Arg->getType()->getPointerElementType(),
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 4b9322c77a40c..10f1135ad8410 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -46,6 +46,6 @@ public:
return ImageHandleList[Idx].c_str();
}
};
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
new file mode 100644
index 0000000000000..a61c291d233ff
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -0,0 +1,154 @@
+//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
+// of a MachineFunction.
+//
+// mov %SPL, %depot
+// cvta.local %SP, %SPL
+//
+// Because Frame Index is a generic address and alloca can only return generic
+// pointer, without this pass the instructions producing alloca'ed address will
+// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
+// this address with their .local versions, but this may introduce a lot of
+// cvta.to.local instructions. Performance can be improved if we avoid casting
+// address back and forth and directly calculate local address based on %SPL.
+// This peephole pass optimizes these cases, for example
+//
+// It will transform the following pattern
+// %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
+// %vreg1<def> = cvta_to_local_yes_64 %vreg0
+//
+// into
+// %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
+//
+// %VRFrameLocal is the virtual register name of %SPL
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-peephole"
+
+namespace llvm {
+void initializeNVPTXPeepholePass(PassRegistry &);
+}
+
+namespace {
+struct NVPTXPeephole : public MachineFunctionPass {
+ public:
+ static char ID;
+ NVPTXPeephole() : MachineFunctionPass(ID) {
+ initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "NVPTX optimize redundant cvta.to.local instruction";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char NVPTXPeephole::ID = 0;
+
+INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
+
+static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
+ auto &MBB = *Root.getParent();
+ auto &MF = *MBB.getParent();
+ // Check current instruction is cvta.to.local
+ if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
+ Root.getOpcode() != NVPTX::cvta_to_local_yes)
+ return false;
+
+ auto &Op = Root.getOperand(1);
+ const auto &MRI = MF.getRegInfo();
+ MachineInstr *GenericAddrDef = nullptr;
+ if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
+ GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
+ }
+
+ // Check the register operand is uniquely defined by LEA_ADDRi instruction
+ if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
+ (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
+ GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
+ return false;
+ }
+
+ // Check the LEA_ADDRi operand is Frame index
+ auto &BaseAddrOp = GenericAddrDef->getOperand(1);
+ if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
+ return true;
+ }
+
+ return false;
+}
+
+static void CombineCVTAToLocal(MachineInstr &Root) {
+ auto &MBB = *Root.getParent();
+ auto &MF = *MBB.getParent();
+ const auto &MRI = MF.getRegInfo();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
+ Root.getOperand(0).getReg())
+ .addReg(NVPTX::VRFrameLocal)
+ .addOperand(Prev.getOperand(2));
+
+ MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
+
+ // Check if MRI has only one non dbg use, which is Root
+ if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
+ Prev.eraseFromParentAndMarkDBGValuesForRemoval();
+ }
+ Root.eraseFromParentAndMarkDBGValuesForRemoval();
+}
+
+bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ // Loop over all of the basic blocks.
+ for (auto &MBB : MF) {
+ // Traverse the basic block.
+ auto BlockIter = MBB.begin();
+
+ while (BlockIter != MBB.end()) {
+ auto &MI = *BlockIter++;
+ if (isCVTAToLocalCombinationCandidate(MI)) {
+ CombineCVTAToLocal(MI);
+ Changed = true;
+ }
+ } // Instruction
+ } // Basic Block
+
+ // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
+ const auto &MRI = MF.getRegInfo();
+ if (MRI.use_empty(NVPTX::VRFrame)) {
+ if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
+ MI->eraseFromParentAndMarkDBGValuesForRemoval();
+ }
+ }
+
+ return Changed;
+}
+
+MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index ea58f7787489c..5fd69a6815a8f 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -39,7 +39,7 @@ public:
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
};
-} // namespace
+}
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
return new NVPTXPrologEpilogPass();
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 3ef997b006fad..6e97f9efbc27c 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -69,7 +69,7 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
}
return "";
}
-} // namespace llvm
+}
NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index efcee6b6f2bd3..ff6ccc457db7b 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -65,5 +65,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot,
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRFrameLocal, VRDepot,
(sequence "ENVREG%u", 0, 31))>;
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index bb0adc59a3fda..e83f735a551e8 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -45,7 +45,7 @@ private:
bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
unsigned &Idx);
};
-} // namespace
+}
char NVPTXReplaceImageHandles::ID = 0;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index d4520451d37d8..c7287719be5f3 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -103,6 +103,6 @@ public:
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
};
-} // namespace llvm
+} // End llvm namespace
#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index c071ee82abc68..9d9072efc3821 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -210,6 +210,10 @@ bool NVPTXPassConfig::addInstSelector() {
void NVPTXPassConfig::addPostRegAlloc() {
addPass(createNVPTXPrologEpilogPass(), false);
+ // NVPTXPrologEpilogPass calculates frame object offset and replace frame
+ // index with VRFrame register. NVPTXPeephole need to be run after that and
+ // will replace VRFrame with VRFrameLocal when possible.
+ addPass(createNVPTXPeephole());
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 4d937c6a8bec5..7e2ce73daaa31 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -91,6 +91,6 @@ void dumpInstRec(Value *v, std::set<Instruction *> *visited);
void dumpInstRec(Value *v);
void dumpParent(Value *v);
-} // namespace llvm
+}
#endif
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 1c2043069e1e8..5e375b7852e46 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -75,7 +75,7 @@ private:
bool handleFunction(Function *ReflectFunction);
void setVarMap();
};
-} // namespace
+}
ModulePass *llvm::createNVVMReflectPass() {
return new NVVMReflect();