aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp133
1 files changed, 66 insertions, 67 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3a59306c4998..b1d842122060 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -45,7 +45,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
@@ -329,7 +328,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const TargetLowering *TLI = STI.getTargetLowering();
+ const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
Type *Ty = F->getReturnType();
@@ -363,7 +362,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
unsigned totalsz = DL.getTypeAllocSize(Ty);
unsigned retAlignment = 0;
if (!getAlign(*F, 0, retAlignment))
- retAlignment = DL.getABITypeAlignment(Ty);
+ retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
<< "]";
} else
@@ -513,7 +512,7 @@ void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
OutStreamer->AddComment(Twine("implicit-def: ") +
STI.getRegisterInfo()->getName(RegNo));
}
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
@@ -818,9 +817,13 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
"Missed a global variable");
assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const NVPTXSubtarget &STI =
+ *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
+
// Print out module-level global variables in proper order
for (unsigned i = 0, e = Globals.size(); i != e; ++i)
- printModuleLevelGV(Globals[i], OS2);
+ printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
OS2 << '\n';
@@ -888,17 +891,18 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
clearAnnotationCache(&M);
- // Close the last emitted section
- if (HasDebugInfo) {
- static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
- ->closeLastSection();
- // Emit empty .debug_loc section for better support of the empty files.
- OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
- }
+ if (auto *TS = static_cast<NVPTXTargetStreamer *>(
+ OutStreamer->getTargetStreamer())) {
+ // Close the last emitted section
+ if (HasDebugInfo) {
+ TS->closeLastSection();
+ // Emit empty .debug_loc section for better support of the empty files.
+ OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
+ }
- // Output last DWARF .file directives, if any.
- static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
- ->outputDwarfFileDirectives();
+ // Output last DWARF .file directives, if any.
+ TS->outputDwarfFileDirectives();
+ }
return ret;
@@ -957,8 +961,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
}
void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
- raw_ostream &O,
- bool processDemoted) {
+ raw_ostream &O, bool processDemoted,
+ const NVPTXSubtarget &STI) {
// Skip meta data
if (GVar->hasSection()) {
if (GVar->getSection() == "llvm.metadata")
@@ -1001,7 +1005,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// (extern) declarations, no definition or initializer
// Currently the only known declaration is for an automatic __local
// (.shared) promoted to global.
- emitPTXGlobalVariable(GVar, O);
+ emitPTXGlobalVariable(GVar, O, STI);
O << ";\n";
return;
}
@@ -1095,6 +1099,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
emitPTXAddressSpace(PTy->getAddressSpace(), O);
if (isManaged(*GVar)) {
+ if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
+ report_fatal_error(
+ ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
+ }
O << " .attribute(.managed)";
}
@@ -1214,9 +1222,13 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
std::vector<const GlobalVariable *> &gvars = localDecls[f];
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const NVPTXSubtarget &STI =
+ *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
+
for (const GlobalVariable *GV : gvars) {
O << "\t// demoted variable\n\t";
- printModuleLevelGV(GV, O, true);
+ printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
}
}
@@ -1282,7 +1294,8 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
}
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
- raw_ostream &O) {
+ raw_ostream &O,
+ const NVPTXSubtarget &STI) {
const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
@@ -1290,6 +1303,13 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << ".";
emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
+ if (isManaged(*GVar)) {
+ if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
+ report_fatal_error(
+ ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
+ }
+ O << " .attribute(.managed)";
+ }
if (MaybeAlign A = GVar->getAlign())
O << " .align " << A->value();
else
@@ -1335,34 +1355,6 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
}
}
-static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
- if (Ty->isSingleValueType())
- return DL.getPrefTypeAlignment(Ty);
-
- auto *ATy = dyn_cast<ArrayType>(Ty);
- if (ATy)
- return getOpenCLAlignment(DL, ATy->getElementType());
-
- auto *STy = dyn_cast<StructType>(Ty);
- if (STy) {
- unsigned int alignStruct = 1;
- // Go through each element of the struct and find the
- // largest alignment.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
- Type *ETy = STy->getElementType(i);
- unsigned int align = getOpenCLAlignment(DL, ETy);
- if (align > alignStruct)
- alignStruct = align;
- }
- return alignStruct;
- }
-
- auto *FTy = dyn_cast<FunctionType>(Ty);
- if (FTy)
- return DL.getPointerPrefAlignment().value();
- return DL.getPrefTypeAlignment(Ty);
-}
-
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
getSymbol(I->getParent())->print(O, MAI);
@@ -1373,7 +1365,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const AttributeList &PAL = F->getAttributes();
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
- const TargetLowering *TLI = STI.getTargetLowering();
+ const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
+
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
@@ -1430,18 +1423,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
+ auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
+ paramIndex](Type *Ty) -> Align {
+ Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
+ MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
+ return std::max(TypeAlign, ParamAlign.valueOrOne());
+ };
+
if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
// Just print .param .align <a> .b8 .param[size];
- // <a> = PAL.getparamalignment
+ // <a> = optimal alignment for the element type; always multiple of
+ // PAL.getParamAlignment
// size = typeallocsize of element type
- const Align align = DL.getValueOrABITypeAlignment(
- PAL.getParamAlignment(paramIndex), Ty);
+ Align OptimalAlign = getOptimalAlignForParam(Ty);
- unsigned sz = DL.getTypeAllocSize(Ty);
- O << "\t.param .align " << align.value() << " .b8 ";
+ O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
printParamName(I, paramIndex, O);
- O << "[" << sz << "]";
+ O << "[" << DL.getTypeAllocSize(Ty) << "]";
continue;
}
@@ -1454,7 +1453,6 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
NVPTX::CUDA) {
- Type *ETy = PTy->getPointerElementType();
int addrSpace = PTy->getAddressSpace();
switch (addrSpace) {
default:
@@ -1470,7 +1468,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ".ptr .global ";
break;
}
- O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
+ Align ParamAlign = I->getParamAlign().valueOrOne();
+ O << ".align " << ParamAlign.value() << " ";
}
printParamName(I, paramIndex, O);
continue;
@@ -1511,17 +1510,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
continue;
}
- // param has byVal attribute. So should be a pointer
- auto *PTy = dyn_cast<PointerType>(Ty);
- assert(PTy && "Param with byval attribute should be a pointer type");
- Type *ETy = PTy->getPointerElementType();
+ // param has byVal attribute.
+ Type *ETy = PAL.getParamByValType(paramIndex);
+ assert(ETy && "Param should have byval type");
if (isABI || isKernelFunc) {
// Just print .param .align <a> .b8 .param[size];
- // <a> = PAL.getparamalignment
+ // <a> = optimal alignment for the element type; always multiple of
+ // PAL.getParamAlignment
// size = typeallocsize of element type
- Align align =
- DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
+ Align OptimalAlign = getOptimalAlignForParam(ETy);
+
// Work around a bug in ptxas. When PTX code takes address of
// byval parameter with alignment < 4, ptxas generates code to
// spill argument into memory. Alas on sm_50+ ptxas generates
@@ -1533,10 +1532,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// TODO: this will need to be undone when we get to support multi-TU
// device-side compilation as it breaks ABI compatibility with nvcc.
// Hopefully ptxas bug is fixed by then.
- if (!isKernelFunc && align < Align(4))
- align = Align(4);
+ if (!isKernelFunc && OptimalAlign < Align(4))
+ OptimalAlign = Align(4);
unsigned sz = DL.getTypeAllocSize(ETy);
- O << "\t.param .align " << align.value() << " .b8 ";
+ O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
continue;