summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/NVPTX
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/NVPTX
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'llvm/lib/Target/NVPTX')
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h1
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h5
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.td7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp96
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h13
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXFrameLowering.h5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp164
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.h7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp35
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp10
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp9
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp14
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h11
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXUtilities.cpp6
24 files changed, 202 insertions, 215 deletions
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 7e1da9b7a94b9..aef0eed6ab9a4 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -51,4 +51,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple,
// @TODO: Can we just disable this?
WeakDirective = "\t// .weak\t";
GlobalDirective = "\t// .globl\t";
+
+ UseIntegratedAssembler = false;
}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index ce5ca99c53970..77c4daea2b6ab 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -16,7 +16,6 @@
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
-class Target;
class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index e1691d2384e6f..b394566edd0df 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -15,11 +15,6 @@
#include <stdint.h>
-namespace llvm {
-class Target;
-
-} // End llvm namespace
-
// Defines symbolic names for PTX registers.
#define GET_REGINFO_ENUM
#include "NVPTXGenRegisterInfo.inc"
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index 17f5ba7d900bd..cdb70ff1f9739 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -26,13 +26,13 @@ NVPTXTargetStreamer::~NVPTXTargetStreamer() = default;
void NVPTXTargetStreamer::outputDwarfFileDirectives() {
for (const std::string &S : DwarfFiles)
- getStreamer().EmitRawText(S.data());
+ getStreamer().emitRawText(S.data());
DwarfFiles.clear();
}
void NVPTXTargetStreamer::closeLastSection() {
if (HasSections)
- getStreamer().EmitRawText("\t}");
+ getStreamer().emitRawText("\t}");
}
void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
@@ -128,7 +128,7 @@ void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
if (Label == Directive)
Label = ",";
}
- Streamer.EmitRawText(OS.str());
+ Streamer.emitRawText(OS.str());
}
#endif
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 0acbace5f848f..dfe0b9cb5ee6d 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -21,7 +21,6 @@ namespace llvm {
class NVPTXTargetMachine;
class FunctionPass;
class MachineFunctionPass;
-class formatted_raw_ostream;
namespace NVPTXCC {
enum CondCodes {
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 1d947ef1ce623..2b39e9f412f76 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -55,6 +55,8 @@ def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72",
"Target SM 7.2">;
def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75",
"Target SM 7.5">;
+def SM80 : SubtargetFeature<"sm_80", "SmVersion", "80",
+ "Target SM 8.0">;
// PTX Versions
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
@@ -77,6 +79,10 @@ def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
"Use PTX version 6.3">;
def PTX64 : SubtargetFeature<"ptx64", "PTXVersion", "64",
"Use PTX version 6.4">;
+def PTX65 : SubtargetFeature<"ptx65", "PTXVersion", "65",
+ "Use PTX version 6.5">;
+def PTX70 : SubtargetFeature<"ptx70", "PTXVersion", "70",
+ "Use PTX version 7.0">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -100,6 +106,7 @@ def : Proc<"sm_62", [SM62, PTX50]>;
def : Proc<"sm_70", [SM70, PTX60]>;
def : Proc<"sm_72", [SM72, PTX61]>;
def : Proc<"sm_75", [SM75, PTX63]>;
+def : Proc<"sm_80", [SM80, PTX70]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 7117438dc503f..da1a398a68f0d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -141,7 +141,7 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV,
Visiting.erase(GV);
}
-void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst Inst;
lowerToMCInst(MI, Inst);
EmitToStreamer(*OutStreamer, Inst);
@@ -434,13 +434,13 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
return false;
}
-void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
- AsmPrinter::EmitBasicBlockStart(MBB);
+void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
+ AsmPrinter::emitBasicBlockStart(MBB);
if (isLoopHeaderOfNoUnroll(MBB))
- OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
+ OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
}
-void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
+void NVPTXAsmPrinter::emitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -467,11 +467,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
if (isKernelFunction(*F))
emitKernelFunctionDirectives(*F, O);
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
VRegMapping.clear();
// Emit open brace for function body.
- OutStreamer->EmitRawText(StringRef("{\n"));
+ OutStreamer->emitRawText(StringRef("{\n"));
setAndEmitFunctionVirtualRegisters(*MF);
// Emit initial .loc debug directive for correct relocation symbol data.
if (MMI && MMI->hasDebugInfo())
@@ -485,18 +485,18 @@ bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
// debug labels/data after the last basic block.
// We need to emit the closing brace here because we don't have function that
// finished emission of the function body.
- OutStreamer->EmitRawText(StringRef("}\n"));
+ OutStreamer->emitRawText(StringRef("}\n"));
return Result;
}
-void NVPTXAsmPrinter::EmitFunctionBodyStart() {
+void NVPTXAsmPrinter::emitFunctionBodyStart() {
SmallString<128> Str;
raw_svector_ostream O(Str);
emitDemotedVars(&MF->getFunction(), O);
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
}
-void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
+void NVPTXAsmPrinter::emitFunctionBodyEnd() {
VRegMapping.clear();
}
@@ -762,13 +762,21 @@ static bool isEmptyXXStructor(GlobalVariable *GV) {
return InitList->getNumOperands() == 0;
}
-bool NVPTXAsmPrinter::doInitialization(Module &M) {
+void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
// Construct a default subtarget off of the TargetMachine defaults. The
// rest of NVPTX isn't friendly to change subtargets per function and
// so the default TargetMachine will have all of the options.
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
+ SmallString<128> Str1;
+ raw_svector_ostream OS1(Str1);
+
+ // Emit header before any dwarf directives are emitted below.
+ emitHeader(M, OS1, *STI);
+ OutStreamer->emitRawText(OS1.str());
+}
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (M.alias_size()) {
report_fatal_error("Module has aliases, which NVPTX does not support.");
return true; // error
@@ -784,26 +792,9 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
return true; // error
}
- SmallString<128> Str1;
- raw_svector_ostream OS1(Str1);
-
// We need to call the parent's one explicitly.
bool Result = AsmPrinter::doInitialization(M);
- // Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1, *STI);
- OutStreamer->EmitRawText(OS1.str());
-
- // Emit module-level inline asm if it exists.
- if (!M.getModuleInlineAsm().empty()) {
- OutStreamer->AddComment("Start of file scope inline assembly");
- OutStreamer->AddBlankLine();
- OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
- OutStreamer->AddBlankLine();
- OutStreamer->AddComment("End of file scope inline assembly");
- OutStreamer->AddBlankLine();
- }
-
GlobalsEmitted = false;
return Result;
@@ -838,7 +829,7 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
OS2 << '\n';
- OutStreamer->EmitRawText(OS2.str());
+ OutStreamer->emitRawText(OS2.str());
}
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
@@ -929,7 +920,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
->closeLastSection();
// Emit empty .debug_loc section for better support of the empty files.
- OutStreamer->EmitRawText("\t.section\t.debug_loc\t{\t}");
+ OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
}
// Output last DWARF .file directives, if any.
@@ -982,7 +973,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
msg.append("Error: ");
msg.append("Symbol ");
if (V->hasName())
- msg.append(V->getName());
+ msg.append(std::string(V->getName()));
msg.append("has unsupported appending linkage type");
llvm_unreachable(msg.c_str());
} else if (!V->hasInternalLinkage() &&
@@ -1184,7 +1175,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
case Type::IntegerTyID: // Integers larger than 64 bits
case Type::StructTyID:
case Type::ArrayTyID:
- case Type::VectorTyID:
+ case Type::FixedVectorTyID:
ElementSize = DL.getTypeStoreSize(ETy);
// Ptx allows variable initilization only for constant and
// global state spaces.
@@ -1358,7 +1349,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
switch (ETy->getTypeID()) {
case Type::StructTyID:
case Type::ArrayTyID:
- case Type::VectorTyID:
+ case Type::FixedVectorTyID:
ElementSize = DL.getTypeStoreSize(ETy);
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
@@ -1439,7 +1430,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isKernelFunction(*F)) {
if (isSampler(*I) || isImage(*I)) {
if (isImage(*I)) {
- std::string sname = I->getName();
+ std::string sname = std::string(I->getName());
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
if (hasImageHandles)
O << "\t.param .u64 .ptr .surfref ";
@@ -1634,8 +1625,8 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
const MachineFrameInfo &MFI = MF.getFrameInfo();
int NumBytes = (int) MFI.getStackSize();
if (NumBytes) {
- O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
- << getFunctionNumber() << "[" << NumBytes << "];\n";
+ O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
+ << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
O << "\t.reg .b64 \t%SP;\n";
O << "\t.reg .b64 \t%SPL;\n";
@@ -1684,7 +1675,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
}
}
- OutStreamer->EmitRawText(O.str());
+ OutStreamer->emitRawText(O.str());
}
void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
@@ -1815,7 +1806,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
aggBuffer->addBytes(ptr, 4, Bytes);
break;
} else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
+ if (const auto *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstant(Cexpr, DL))) {
int int32 = (int)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int32);
@@ -1837,7 +1828,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
aggBuffer->addBytes(ptr, 8, Bytes);
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
+ if (const auto *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstant(Cexpr, DL))) {
long long int64 = (long long)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int64);
@@ -1892,7 +1883,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
}
case Type::ArrayTyID:
- case Type::VectorTyID:
+ case Type::FixedVectorTyID:
case Type::StructTyID: {
if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
int ElementSize = DL.getTypeAllocSize(CPV->getType());
@@ -1993,23 +1984,22 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
switch (CE->getOpcode()) {
- default:
+ default: {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
- if (C && C != CE)
- return lowerConstantForGV(C, ProcessingGeneric);
+ Constant *C = ConstantFoldConstant(CE, getDataLayout());
+ if (C != CE)
+ return lowerConstantForGV(C, ProcessingGeneric);
// Otherwise report the problem to the user.
- {
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/false,
- !MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
- }
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction().getParent());
+ report_fatal_error(OS.str());
+ }
case Instruction::AddrSpaceCast: {
// Strip the addrspacecast and pass along the operand
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 7a66854d32f4b..5c3a4eb470c1c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -32,7 +32,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -200,13 +200,14 @@ private:
const Function *F;
std::string CurrentFnName;
- void EmitBasicBlockStart(const MachineBasicBlock &MBB) override;
- void EmitFunctionEntryLabel() override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
+ void emitStartOfAsmFile(Module &M) override;
+ void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
+ void emitFunctionEntryLabel() override;
+ void emitFunctionBodyStart() override;
+ void emitFunctionBodyEnd() override;
void emitImplicitDef(const MachineInstr *MI) const override;
- void EmitInstruction(const MachineInstr *) override;
+ void emitInstruction(const MachineInstr *) override;
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
MCOperand GetSymbolRef(const MCSymbol *Symbol);
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index d26912f47e501..c533921842e48 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -65,7 +65,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
int NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
- unsigned &FrameReg) const {
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
FrameReg = NVPTX::VRDepot;
return MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
@@ -83,3 +83,8 @@ MachineBasicBlock::iterator NVPTXFrameLowering::eliminateCallFramePseudoInstr(
// ADJCALLSTACKUP instructions.
return MBB.erase(I);
}
+
+TargetFrameLowering::DwarfFrameBase
+NVPTXFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
+ return {DwarfFrameBase::CFA, {0}};
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
index 40269f58f06e7..e4c2b9e77f709 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
-class NVPTXSubtarget;
+
class NVPTXFrameLowering : public TargetFrameLowering {
public:
explicit NVPTXFrameLowering();
@@ -25,11 +25,12 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
+ Register &FrameReg) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
+ DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index b36d9b2e240a3..9078ff8cfb975 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -144,7 +144,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
GV->replaceAllUsesWith(BitCastNewGV);
- std::string Name = GV->getName();
+ std::string Name = std::string(GV->getName());
GV->eraseFromParent();
NewGV->setName(Name);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 92f71c687c461..f45cc06e0a0a3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -88,11 +87,6 @@ static cl::opt<bool> UsePrecSqrtF32(
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
cl::init(true));
-static cl::opt<bool> FtzEnabled(
- "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
- cl::init(false));
-
int NVPTXTargetLowering::getDivF32Level() const {
if (UsePrecDivF32.getNumOccurrences() > 0) {
// If nvptx-prec-div32=N is used on the command-line, always honor it
@@ -117,18 +111,8 @@ bool NVPTXTargetLowering::usePrecSqrtF32() const {
}
bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
- // TODO: Get rid of this flag; there can be only one way to do this.
- if (FtzEnabled.getNumOccurrences() > 0) {
- // If nvptx-f32ftz is used on the command-line, always honor it
- return FtzEnabled;
- } else {
- const Function &F = MF.getFunction();
- // Otherwise, check for an nvptx-f32ftz attribute on the function
- if (F.hasFnAttribute("nvptx-f32ftz"))
- return F.getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
- else
- return false;
- }
+ return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==
+ DenormalMode::PreserveSign;
}
static bool IsPTXVectorType(MVT VT) {
@@ -233,11 +217,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// covered by the vector op. Otherwise, it returns 1.
static unsigned CanMergeParamLoadStoresStartingAt(
unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
- const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) {
- assert(isPowerOf2_32(AccessSize) && "must be a power of 2!");
+ const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) {
// Can't vectorize if param alignment is not sufficient.
- if (AccessSize > ParamAlignment)
+ if (ParamAlignment < AccessSize)
return 1;
// Can't vectorize if offset is not aligned.
if (Offsets[Idx] & (AccessSize - 1))
@@ -297,7 +280,7 @@ enum ParamVectorizationFlags {
static SmallVector<ParamVectorizationFlags, 16>
VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
const SmallVectorImpl<uint64_t> &Offsets,
- unsigned ParamAlignment) {
+ Align ParamAlignment) {
// Set vector size to match ValueVTs and mark all elements as
// scalars by default.
SmallVector<ParamVectorizationFlags, 16> VectorInfo;
@@ -1258,8 +1241,8 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
- ImmutableCallSite CS) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
+ const CallBase &CB) const {
auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
@@ -1294,8 +1277,8 @@ std::string NVPTXTargetLowering::getPrototype(
O << ".param .b" << PtrVT.getSizeInBits() << " _";
} else if (retTy->isAggregateType() || retTy->isVectorTy() ||
retTy->isIntegerTy(128)) {
- O << ".param .align " << retAlignment << " .b8 _["
- << DL.getTypeAllocSize(retTy) << "]";
+ O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
+ << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
} else {
llvm_unreachable("Unknown return type");
}
@@ -1316,7 +1299,7 @@ std::string NVPTXTargetLowering::getPrototype(
if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
unsigned align = 0;
- const CallInst *CallI = cast<CallInst>(CS.getInstruction());
+ const CallInst *CallI = cast<CallInst>(&CB);
// +1 because index 0 is reserved for return type alignment
if (!getAlign(*CallI, i + 1, align))
align = DL.getABITypeAlignment(Ty);
@@ -1358,9 +1341,9 @@ std::string NVPTXTargetLowering::getPrototype(
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
- unsigned align = Outs[OIdx].Flags.getByValAlign();
+ Align align = Outs[OIdx].Flags.getNonZeroByValAlign();
unsigned sz = DL.getTypeAllocSize(ETy);
- O << ".param .align " << align << " .b8 ";
+ O << ".param .align " << align.value() << " .b8 ";
O << "_";
O << "[" << sz << "]";
}
@@ -1368,31 +1351,29 @@ std::string NVPTXTargetLowering::getPrototype(
return O.str();
}
-unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
- ImmutableCallSite CS,
- Type *Ty, unsigned Idx,
- const DataLayout &DL) const {
- if (!CS) {
+Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
+ const CallBase *CB, Type *Ty,
+ unsigned Idx,
+ const DataLayout &DL) const {
+ if (!CB) {
// CallSite is zero, fallback to ABI type alignment
- return DL.getABITypeAlignment(Ty);
+ return DL.getABITypeAlign(Ty);
}
- unsigned Align = 0;
- const Value *DirectCallee = CS.getCalledFunction();
+ unsigned Alignment = 0;
+ const Function *DirectCallee = CB->getCalledFunction();
if (!DirectCallee) {
// We don't have a direct function symbol, but that may be because of
// constant cast instructions in the call.
- const Instruction *CalleeI = CS.getInstruction();
- assert(CalleeI && "Call target is not a function or derived value?");
// With bitcast'd call targets, the instruction will be the call
- if (isa<CallInst>(CalleeI)) {
+ if (const auto *CI = dyn_cast<CallInst>(CB)) {
// Check if we have call alignment metadata
- if (getAlign(*cast<CallInst>(CalleeI), Idx, Align))
- return Align;
+ if (getAlign(*CI, Idx, Alignment))
+ return Align(Alignment);
- const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
+ const Value *CalleeV = CI->getCalledOperand();
// Ignore any bitcast instructions
while (isa<ConstantExpr>(CalleeV)) {
const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
@@ -1404,20 +1385,20 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
// We have now looked past all of the bitcasts. Do we finally have a
// Function?
- if (isa<Function>(CalleeV))
- DirectCallee = CalleeV;
+ if (const auto *CalleeF = dyn_cast<Function>(CalleeV))
+ DirectCallee = CalleeF;
}
}
// Check for function alignment information if we found that the
// ultimate target is a Function
if (DirectCallee)
- if (getAlign(*cast<Function>(DirectCallee), Idx, Align))
- return Align;
+ if (getAlign(*DirectCallee, Idx, Alignment))
+ return Align(Alignment);
// Call is indirect or alignment information is not available, fall back to
// the ABI type alignment
- return DL.getABITypeAlignment(Ty);
+ return DL.getABITypeAlign(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -1432,7 +1413,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool &isTailCall = CLI.IsTailCall;
ArgListTy &Args = CLI.getArgs();
Type *RetTy = CLI.RetTy;
- ImmutableCallSite CS = CLI.CS;
+ const CallBase *CB = CLI.CB;
const DataLayout &DL = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
@@ -1465,15 +1446,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
- unsigned ArgAlign =
- getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
+ Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL);
unsigned AllocSize = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
bool NeedAlign; // Does argument declaration specify alignment?
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
// declare .param .align <align> .b8 .param<n>[<size>];
SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
+ Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
DAG.getConstant(paramCount, dl, MVT::i32),
DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
@@ -1554,8 +1534,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Adjust type of the store op if we've extended the scalar
// return value.
EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
- unsigned EltAlign =
- NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0;
+ MaybeAlign EltAlign;
+ if (NeedAlign)
+ EltAlign = commonAlignment(ArgAlign, Offsets[j]);
Chain = DAG.getMemIntrinsicNode(
Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
@@ -1585,7 +1566,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
+ Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign();
// The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
// so we don't need to worry about natural alignment or not.
// See TargetLowering::LowerCallTo().
@@ -1593,18 +1574,19 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Enforce minumum alignment of 4 to work around ptxas miscompile
// for sm_50+. See corresponding alignment adjustment in
// emitFunctionParamList() for details.
- if (ArgAlign < 4)
- ArgAlign = 4;
- SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32), InFlag};
+ if (ArgAlign < Align(4))
+ ArgAlign = Align(4);
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
EVT elemtype = VTs[j];
int curOffset = Offsets[j];
- unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
+ unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset);
auto PtrVT = getPointerTy(DL);
SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
DAG.getConstant(curOffset, dl, PtrVT));
@@ -1618,10 +1600,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(paramCount, dl, MVT::i32),
DAG.getConstant(curOffset, dl, MVT::i32),
theVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, elemtype,
- MachinePointerInfo(), /* Align */ 0,
- MachineMemOperand::MOStore);
+ Chain = DAG.getMemIntrinsicNode(
+ NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype,
+ MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore);
InFlag = Chain.getValue(1);
}
@@ -1629,7 +1610,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
- unsigned retAlignment = 0;
+ MaybeAlign retAlignment = None;
// Handle Result
if (Ins.size() > 0) {
@@ -1657,12 +1638,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DeclareRetOps);
InFlag = Chain.getValue(1);
} else {
- retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
+ assert(retAlignment && "retAlignment is guaranteed to be set");
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain,
- DAG.getConstant(retAlignment, dl, MVT::i32),
- DAG.getConstant(resultsz / 8, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag };
+ SDValue DeclareRetOps[] = {
+ Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
+ DAG.getConstant(resultsz / 8, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
DeclareRetOps);
InFlag = Chain.getValue(1);
@@ -1672,7 +1654,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Both indirect calls and libcalls have nullptr Func. In order to distinguish
// between them we must rely on the call site value which is valid for
// indirect calls but is always null for libcalls.
- bool isIndirectCall = !Func && CS;
+ bool isIndirectCall = !Func && CB;
if (isa<ExternalSymbolSDNode>(Callee)) {
Function* CalleeFunc = nullptr;
@@ -1695,7 +1677,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
+ std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1768,7 +1750,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
- unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
SmallVector<EVT, 6> LoadVTs;
@@ -1784,7 +1766,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool needTruncate = false;
EVT TheLoadType = VTs[i];
EVT EltType = Ins[i].VT;
- unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]);
+ Align EltAlign = commonAlignment(RetAlign, Offsets[i]);
if (ExtendIntegerRetVal) {
TheLoadType = MVT::i32;
EltType = MVT::i32;
@@ -2320,10 +2302,10 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
MemSDNode *MemSD = cast<MemSDNode>(N);
const DataLayout &TD = DAG.getDataLayout();
- unsigned Align = MemSD->getAlignment();
- unsigned PrefAlign =
- TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
- if (Align < PrefAlign) {
+ Align Alignment = MemSD->getAlign();
+ Align PrefAlign =
+ TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext()));
+ if (Alignment < PrefAlign) {
// This store is not sufficiently aligned, so bail out and let this vector
// store be scalarized. Note that we may still be able to emit smaller
// vector stores. For example, if we are storing a <4 x float> with an
@@ -2559,7 +2541,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
assert(VTs.size() > 0 && "Unexpected empty type.");
auto VectorInfo =
- VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty));
+ VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty));
SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
int VecIdx = -1; // Index of the first element of the current vector.
@@ -2678,7 +2660,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
auto VectorInfo = VectorizePTXValueVTs(
- VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1);
+ VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1));
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
// 32-bits are sign extended or zero extended, depending on whether
@@ -2730,10 +2712,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Adjust type of load/store op if we've extended the scalar
// return value.
EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
- Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other),
- StoreOperands, TheStoreType,
- MachinePointerInfo(), /* Align */ 1,
- MachineMemOperand::MOStore);
+ Chain = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType,
+ MachinePointerInfo(), Align(1), MachineMemOperand::MOStore);
// Cleanup vector state.
StoreOperands.clear();
}
@@ -3799,8 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align =
- MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
return true;
}
@@ -3819,8 +3799,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.flags = MachineMemOperand::MOLoad;
- Info.align =
- MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
return true;
}
@@ -4810,11 +4789,10 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LoadSDNode *LD = cast<LoadSDNode>(N);
- unsigned Align = LD->getAlignment();
+ Align Alignment = LD->getAlign();
auto &TD = DAG.getDataLayout();
- unsigned PrefAlign =
- TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
- if (Align < PrefAlign) {
+ Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext()));
+ if (Alignment < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
// vector loads. For example, if we are loading a <4 x float> with an
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 546fe49808e2d..df9cd41599628 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -491,8 +491,7 @@ public:
std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
- unsigned retAlignment,
- ImmutableCallSite CS) const;
+ MaybeAlign retAlignment, const CallBase &CB) const;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -579,8 +578,8 @@ private:
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty,
- unsigned Idx, const DataLayout &DL) const;
+ Align getArgumentAlignment(SDValue Callee, const CallBase *CB, Type *Ty,
+ unsigned Idx, const DataLayout &DL) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index cff230289e600..ec0c92ccf5c52 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -69,7 +69,7 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// analyzeBranch - Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target). Upon success, this returns false and returns
/// with the following information in various cases:
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 83039241a7c75..6cf59d285e8d3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -113,8 +113,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
/* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
/* CopyLen */ CopyLen,
- /* SrcAlign */ LI->getAlignment(),
- /* DestAlign */ SI->getAlignment(),
+ /* SrcAlign */ LI->getAlign(),
+ /* DestAlign */ SI->getAlign(),
/* SrcIsVolatile */ LI->isVolatile(),
/* DstIsVolatile */ SI->isVolatile(), TTI);
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index c3c5f6fbcba72..e60b5eeacdaee 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -159,12 +159,14 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
assert(PType && "Expecting pointer type in handleByValParam");
Type *StructType = PType->getElementType();
- unsigned AS = Func->getParent()->getDataLayout().getAllocaAddrSpace();
+ const DataLayout &DL = Func->getParent()->getDataLayout();
+ unsigned AS = DL.getAllocaAddrSpace();
AllocaInst *AllocA = new AllocaInst(StructType, AS, Arg->getName(), FirstInst);
// Set the alignment to alignment of the byval parameter. This is because,
// later load/stores assume that alignment, and we are going to replace
// the use of the byval parameter with this alloca instruction.
- AllocA->setAlignment(MaybeAlign(Func->getParamAlignment(Arg->getArgNo())));
+ AllocA->setAlignment(Func->getParamAlign(Arg->getArgNo())
+ .getValueOr(DL.getPrefTypeAlign(StructType)));
Arg->replaceAllUsesWith(AllocA);
Value *ArgInParam = new AddrSpaceCastInst(
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index a7127b0e9a99d..ea2274f394e61 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -67,14 +67,14 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugValue()) {
assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
- unsigned Reg;
+ Register Reg;
int64_t Offset =
TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
MI.getOperand(0).setIsDebug();
auto *DIExpr = DIExpression::prepend(
MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
- MI.getOperand(3).setMetadata(DIExpr);
+ MI.getDebugExpressionOp().setMetadata(DIExpr);
continue;
}
@@ -97,22 +97,21 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
}
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-static inline void
-AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
- bool StackGrowsDown, int64_t &Offset,
- unsigned &MaxAlign) {
+static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ Align &MaxAlign) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI.getObjectSize(FrameIdx);
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
+ Align Alignment = MFI.getObjectAlign(FrameIdx);
// If the alignment of this object is greater than that of the stack, then
// increase the stack alignment to match.
- MaxAlign = std::max(MaxAlign, Align);
+ MaxAlign = std::max(MaxAlign, Alignment);
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = alignTo(Offset, Alignment);
if (StackGrowsDown) {
LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
@@ -169,7 +168,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// NOTE: We do not have a call stack
- unsigned MaxAlign = MFI.getMaxAlignment();
+ Align MaxAlign = MFI.getMaxAlign();
// No scavenger
@@ -178,10 +177,10 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// frame index registers. Functions which don't want/need this optimization
// will continue to use the existing code path.
if (MFI.getUseLocalStackAllocationBlock()) {
- unsigned Align = MFI.getLocalFrameMaxAlign().value();
+ Align Alignment = MFI.getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = (Offset + Align - 1) / Align * Align;
+ Offset = alignTo(Offset, Alignment);
LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -196,7 +195,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Allocate the local block
Offset += MFI.getLocalFrameSize();
- MaxAlign = std::max(Align, MaxAlign);
+ MaxAlign = std::max(Alignment, MaxAlign);
}
// No stack protector
@@ -227,18 +226,16 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// ensure that the callee's frame or the alloca data is suitably aligned;
// otherwise, for leaf functions, align to the TransientStackAlignment
// value.
- unsigned StackAlign;
+ Align StackAlign;
if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
(RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
- StackAlign = TFI.getStackAlignment();
+ StackAlign = TFI.getStackAlign();
else
- StackAlign = TFI.getTransientStackAlignment();
+ StackAlign = TFI.getTransientStackAlign();
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+ Offset = alignTo(Offset, std::max(StackAlign, MaxAlign));
}
// Update frame info to pretend that this is part of the stack...
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e213089e40852..8ae542130a14c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -152,7 +152,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
- std::string ParamBaseName = MF.getName();
+ std::string ParamBaseName = std::string(MF.getName());
ParamBaseName += "_param_";
assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
unsigned Param = atoi(Sym.data()+ParamBaseName.size());
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 357826c2d19ca..f1fa6416f15fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -33,13 +33,13 @@ void NVPTXSubtarget::anchor() {}
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
// Provide the default CPU if we don't have one.
- TargetName = CPU.empty() ? "sm_20" : CPU;
+ TargetName = std::string(CPU.empty() ? "sm_20" : CPU);
- ParseSubtargetFeatures(TargetName, FS);
+ ParseSubtargetFeatures(TargetName, FS);
- // Set default to PTX 3.2 (CUDA 5.5)
- if (PTXVersion == 0) {
- PTXVersion = 32;
+ // Set default to PTX 3.2 (CUDA 5.5)
+ if (PTXVersion == 0) {
+ PTXVersion = 32;
}
return *this;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 0778706d936a7..85709eb731e29 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -117,7 +117,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
getEffectiveCodeModel(CM, CodeModel::Small), OL),
is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
TLOF(std::make_unique<NVPTXTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, std::string(CPU), std::string(FS), *this) {
if (TT.getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
else
@@ -276,8 +276,6 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
if (getOptLevel() != CodeGenOpt::None) {
addAddressSpaceInferencePasses();
- if (!DisableLoadStoreVectorizer)
- addPass(createLoadStoreVectorizerPass());
addStraightLineScalarOptimizationPasses();
}
@@ -295,8 +293,11 @@ void NVPTXPassConfig::addIRPasses() {
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
addEarlyCSEOrGVNPass();
+ if (!DisableLoadStoreVectorizer)
+ addPass(createLoadStoreVectorizerPass());
+ }
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index ab2a93b759227..366d92a5a8054 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -27,7 +27,7 @@ public:
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C,
- unsigned &Align) const override {
+ Align &Alignment) const override {
return ReadOnlySection;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index afc40a7abed08..3873c73fb2e03 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -112,7 +112,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
}
int NVPTXTTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
@@ -123,7 +124,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
switch (ISD) {
default:
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
@@ -136,7 +138,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost(
if (LT.second.SimpleTy == MVT::i64)
return 2 * LT.first;
// Delegate other cases to the basic TTI.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
}
@@ -152,3 +155,8 @@ void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = UP.Threshold / 4;
}
+
+void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ BaseT::getPeelingPreferences(L, SE, PP);
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 864d8b91a89a5..cb832031f1add 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -50,13 +50,11 @@ public:
// Loads and stores can be vectorized if the alignment is at least as big as
// the load/store we want to vectorize.
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const {
return Alignment >= ChainSizeInBytes;
}
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const {
return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
}
@@ -87,6 +85,7 @@ public:
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -96,6 +95,10 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) {
// Volatile loads/stores are only supported for shared and global address
// spaces, or for generic AS that maps to them.
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 43c2e9920403e..74d129d330f30 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -226,17 +226,17 @@ bool isManaged(const Value &val) {
std::string getTextureName(const Value &val) {
assert(val.hasName() && "Found texture variable with no name");
- return val.getName();
+ return std::string(val.getName());
}
std::string getSurfaceName(const Value &val) {
assert(val.hasName() && "Found surface variable with no name");
- return val.getName();
+ return std::string(val.getName());
}
std::string getSamplerName(const Value &val) {
assert(val.hasName() && "Found sampler variable with no name");
- return val.getName();
+ return std::string(val.getName());
}
bool getMaxNTIDx(const Function &F, unsigned &x) {