diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/NVPTX | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Target/NVPTX')
24 files changed, 202 insertions, 215 deletions
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 7e1da9b7a94b9..aef0eed6ab9a4 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -51,4 +51,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple, // @TODO: Can we just disable this? WeakDirective = "\t// .weak\t"; GlobalDirective = "\t// .globl\t"; + + UseIntegratedAssembler = false; } diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index ce5ca99c53970..77c4daea2b6ab 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -16,7 +16,6 @@ #include "llvm/MC/MCAsmInfo.h" namespace llvm { -class Target; class Triple; class NVPTXMCAsmInfo : public MCAsmInfo { diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h index e1691d2384e6f..b394566edd0df 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -15,11 +15,6 @@ #include <stdint.h> -namespace llvm { -class Target; - -} // End llvm namespace - // Defines symbolic names for PTX registers. #define GET_REGINFO_ENUM #include "NVPTXGenRegisterInfo.inc" diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp index 17f5ba7d900bd..cdb70ff1f9739 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp @@ -26,13 +26,13 @@ NVPTXTargetStreamer::~NVPTXTargetStreamer() = default; void NVPTXTargetStreamer::outputDwarfFileDirectives() { for (const std::string &S : DwarfFiles) - getStreamer().EmitRawText(S.data()); + getStreamer().emitRawText(S.data()); DwarfFiles.clear(); } void NVPTXTargetStreamer::closeLastSection() { if (HasSections) - getStreamer().EmitRawText("\t}"); + getStreamer().emitRawText("\t}"); } void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) { @@ -128,7 +128,7 @@ void NVPTXTargetStreamer::emitRawBytes(StringRef Data) { if (Label == Directive) Label = ","; } - Streamer.EmitRawText(OS.str()); + Streamer.emitRawText(OS.str()); } #endif } diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 0acbace5f848f..dfe0b9cb5ee6d 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -21,7 +21,6 @@ namespace llvm { class NVPTXTargetMachine; class FunctionPass; class MachineFunctionPass; -class formatted_raw_ostream; namespace NVPTXCC { enum CondCodes { diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index 1d947ef1ce623..2b39e9f412f76 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -55,6 +55,8 @@ def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72", "Target SM 7.2">; def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75", "Target SM 7.5">; +def SM80 : SubtargetFeature<"sm_80", "SmVersion", "80", + "Target SM 8.0">; // PTX Versions def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", @@ -77,6 +79,10 @@ def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63", "Use PTX version 6.3">; def PTX64 : SubtargetFeature<"ptx64", "PTXVersion", "64", "Use PTX version 6.4">; +def PTX65 : SubtargetFeature<"ptx65", "PTXVersion", "65", + "Use PTX version 6.5">; +def PTX70 : SubtargetFeature<"ptx70", "PTXVersion", "70", + "Use PTX version 7.0">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -100,6 +106,7 @@ def : Proc<"sm_62", [SM62, PTX50]>; def : Proc<"sm_70", [SM70, PTX60]>; def : Proc<"sm_72", [SM72, PTX61]>; def : Proc<"sm_75", [SM75, PTX63]>; +def : Proc<"sm_80", [SM80, PTX70]>; def NVPTXInstrInfo : InstrInfo { } diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 7117438dc503f..da1a398a68f0d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -141,7 +141,7 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV, Visiting.erase(GV); } -void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { +void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) { MCInst Inst; lowerToMCInst(MI, Inst); EmitToStreamer(*OutStreamer, Inst); @@ -434,13 +434,13 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( return false; } -void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { - AsmPrinter::EmitBasicBlockStart(MBB); +void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { + AsmPrinter::emitBasicBlockStart(MBB); if (isLoopHeaderOfNoUnroll(MBB)) - OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); + OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n")); } -void NVPTXAsmPrinter::EmitFunctionEntryLabel() { +void NVPTXAsmPrinter::emitFunctionEntryLabel() { SmallString<128> Str; raw_svector_ostream O(Str); @@ -467,11 +467,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { if (isKernelFunction(*F)) emitKernelFunctionDirectives(*F, O); - OutStreamer->EmitRawText(O.str()); + OutStreamer->emitRawText(O.str()); VRegMapping.clear(); // Emit open brace for function body. - OutStreamer->EmitRawText(StringRef("{\n")); + OutStreamer->emitRawText(StringRef("{\n")); setAndEmitFunctionVirtualRegisters(*MF); // Emit initial .loc debug directive for correct relocation symbol data. if (MMI && MMI->hasDebugInfo()) @@ -485,18 +485,18 @@ bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) { // debug labels/data after the last basic block. // We need to emit the closing brace here because we don't have function that // finished emission of the function body. - OutStreamer->EmitRawText(StringRef("}\n")); + OutStreamer->emitRawText(StringRef("}\n")); return Result; } -void NVPTXAsmPrinter::EmitFunctionBodyStart() { +void NVPTXAsmPrinter::emitFunctionBodyStart() { SmallString<128> Str; raw_svector_ostream O(Str); emitDemotedVars(&MF->getFunction(), O); - OutStreamer->EmitRawText(O.str()); + OutStreamer->emitRawText(O.str()); } -void NVPTXAsmPrinter::EmitFunctionBodyEnd() { +void NVPTXAsmPrinter::emitFunctionBodyEnd() { VRegMapping.clear(); } @@ -762,13 +762,21 @@ static bool isEmptyXXStructor(GlobalVariable *GV) { return InitList->getNumOperands() == 0; } -bool NVPTXAsmPrinter::doInitialization(Module &M) { +void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) { // Construct a default subtarget off of the TargetMachine defaults. The // rest of NVPTX isn't friendly to change subtargets per function and // so the default TargetMachine will have all of the options. const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl()); + SmallString<128> Str1; + raw_svector_ostream OS1(Str1); + + // Emit header before any dwarf directives are emitted below. + emitHeader(M, OS1, *STI); + OutStreamer->emitRawText(OS1.str()); +} +bool NVPTXAsmPrinter::doInitialization(Module &M) { if (M.alias_size()) { report_fatal_error("Module has aliases, which NVPTX does not support."); return true; // error @@ -784,26 +792,9 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { return true; // error } - SmallString<128> Str1; - raw_svector_ostream OS1(Str1); - // We need to call the parent's one explicitly. bool Result = AsmPrinter::doInitialization(M); - // Emit header before any dwarf directives are emitted below. - emitHeader(M, OS1, *STI); - OutStreamer->EmitRawText(OS1.str()); - - // Emit module-level inline asm if it exists. - if (!M.getModuleInlineAsm().empty()) { - OutStreamer->AddComment("Start of file scope inline assembly"); - OutStreamer->AddBlankLine(); - OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm())); - OutStreamer->AddBlankLine(); - OutStreamer->AddComment("End of file scope inline assembly"); - OutStreamer->AddBlankLine(); - } - GlobalsEmitted = false; return Result; @@ -838,7 +829,7 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) { OS2 << '\n'; - OutStreamer->EmitRawText(OS2.str()); + OutStreamer->emitRawText(OS2.str()); } void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, @@ -929,7 +920,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer()) ->closeLastSection(); // Emit empty .debug_loc section for better support of the empty files. - OutStreamer->EmitRawText("\t.section\t.debug_loc\t{\t}"); + OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}"); } // Output last DWARF .file directives, if any. @@ -982,7 +973,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, msg.append("Error: "); msg.append("Symbol "); if (V->hasName()) - msg.append(V->getName()); + msg.append(std::string(V->getName())); msg.append("has unsupported appending linkage type"); llvm_unreachable(msg.c_str()); } else if (!V->hasInternalLinkage() && @@ -1184,7 +1175,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, case Type::IntegerTyID: // Integers larger than 64 bits case Type::StructTyID: case Type::ArrayTyID: - case Type::VectorTyID: + case Type::FixedVectorTyID: ElementSize = DL.getTypeStoreSize(ETy); // Ptx allows variable initilization only for constant and // global state spaces. @@ -1358,7 +1349,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, switch (ETy->getTypeID()) { case Type::StructTyID: case Type::ArrayTyID: - case Type::VectorTyID: + case Type::FixedVectorTyID: ElementSize = DL.getTypeStoreSize(ETy); O << " .b8 "; getSymbol(GVar)->print(O, MAI); @@ -1439,7 +1430,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { if (isKernelFunction(*F)) { if (isSampler(*I) || isImage(*I)) { if (isImage(*I)) { - std::string sname = I->getName(); + std::string sname = std::string(I->getName()); if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { if (hasImageHandles) O << "\t.param .u64 .ptr .surfref "; @@ -1634,8 +1625,8 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( const MachineFrameInfo &MFI = MF.getFrameInfo(); int NumBytes = (int) MFI.getStackSize(); if (NumBytes) { - O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME - << getFunctionNumber() << "[" << NumBytes << "];\n"; + O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t" + << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"; if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; @@ -1684,7 +1675,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( } } - OutStreamer->EmitRawText(O.str()); + OutStreamer->emitRawText(O.str()); } void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { @@ -1815,7 +1806,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (const auto *constInt = dyn_cast_or_null<ConstantInt>( + if (const auto *constInt = dyn_cast<ConstantInt>( ConstantFoldConstant(Cexpr, DL))) { int int32 = (int)(constInt->getZExtValue()); ConvertIntToBytes<>(ptr, int32); @@ -1837,7 +1828,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (const auto *constInt = dyn_cast_or_null<ConstantInt>( + if (const auto *constInt = dyn_cast<ConstantInt>( ConstantFoldConstant(Cexpr, DL))) { long long int64 = (long long)(constInt->getZExtValue()); ConvertIntToBytes<>(ptr, int64); @@ -1892,7 +1883,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, } case Type::ArrayTyID: - case Type::VectorTyID: + case Type::FixedVectorTyID: case Type::StructTyID: { if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) { int ElementSize = DL.getTypeAllocSize(CPV->getType()); @@ -1993,23 +1984,22 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) } switch (CE->getOpcode()) { - default: + default: { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = ConstantFoldConstant(CE, getDataLayout())) - if (C && C != CE) - return lowerConstantForGV(C, ProcessingGeneric); + Constant *C = ConstantFoldConstant(CE, getDataLayout()); + if (C != CE) + return lowerConstantForGV(C, ProcessingGeneric); // Otherwise report the problem to the user. - { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/false, - !MF ? nullptr : MF->getFunction().getParent()); - report_fatal_error(OS.str()); - } + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/false, + !MF ? nullptr : MF->getFunction().getParent()); + report_fatal_error(OS.str()); + } case Instruction::AddrSpaceCast: { // Strip the addrspacecast and pass along the operand diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index 7a66854d32f4b..5c3a4eb470c1c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -32,7 +32,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/PassAnalysisSupport.h" +#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -200,13 +200,14 @@ private: const Function *F; std::string CurrentFnName; - void EmitBasicBlockStart(const MachineBasicBlock &MBB) override; - void EmitFunctionEntryLabel() override; - void EmitFunctionBodyStart() override; - void EmitFunctionBodyEnd() override; + void emitStartOfAsmFile(Module &M) override; + void emitBasicBlockStart(const MachineBasicBlock &MBB) override; + void emitFunctionEntryLabel() override; + void emitFunctionBodyStart() override; + void emitFunctionBodyEnd() override; void emitImplicitDef(const MachineInstr *MI) const override; - void EmitInstruction(const MachineInstr *) override; + void emitInstruction(const MachineInstr *) override; void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); MCOperand GetSymbolRef(const MCSymbol *Symbol); diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp index d26912f47e501..c533921842e48 100644 --- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -65,7 +65,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF, int NVPTXFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const { + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); FrameReg = NVPTX::VRDepot; return MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); @@ -83,3 +83,8 @@ MachineBasicBlock::iterator NVPTXFrameLowering::eliminateCallFramePseudoInstr( // ADJCALLSTACKUP instructions. return MBB.erase(I); } + +TargetFrameLowering::DwarfFrameBase +NVPTXFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const { + return {DwarfFrameBase::CFA, {0}}; +} diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h index 40269f58f06e7..e4c2b9e77f709 100644 --- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -16,7 +16,7 @@ #include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { -class NVPTXSubtarget; + class NVPTXFrameLowering : public TargetFrameLowering { public: explicit NVPTXFrameLowering(); @@ -25,11 +25,12 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const override; + Register &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; + DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override; }; } // End llvm namespace diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index b36d9b2e240a3..9078ff8cfb975 100644 --- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -144,7 +144,7 @@ bool GenericToNVVM::runOnModule(Module &M) { // variable initializers, as other uses have been already been removed // while walking through the instructions in function definitions. GV->replaceAllUsesWith(BitCastNewGV); - std::string Name = GV->getName(); + std::string Name = std::string(GV->getName()); GV->eraseFromParent(); NewGV->setName(Name); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 92f71c687c461..f45cc06e0a0a3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -31,7 +31,6 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -88,11 +87,6 @@ static cl::opt<bool> UsePrecSqrtF32( cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), cl::init(true)); -static cl::opt<bool> FtzEnabled( - "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), - cl::init(false)); - int NVPTXTargetLowering::getDivF32Level() const { if (UsePrecDivF32.getNumOccurrences() > 0) { // If nvptx-prec-div32=N is used on the command-line, always honor it @@ -117,18 +111,8 @@ bool NVPTXTargetLowering::usePrecSqrtF32() const { } bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { - // TODO: Get rid of this flag; there can be only one way to do this. - if (FtzEnabled.getNumOccurrences() > 0) { - // If nvptx-f32ftz is used on the command-line, always honor it - return FtzEnabled; - } else { - const Function &F = MF.getFunction(); - // Otherwise, check for an nvptx-f32ftz attribute on the function - if (F.hasFnAttribute("nvptx-f32ftz")) - return F.getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; - else - return false; - } + return MF.getDenormalMode(APFloat::IEEEsingle()).Output == + DenormalMode::PreserveSign; } static bool IsPTXVectorType(MVT VT) { @@ -233,11 +217,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, // covered by the vector op. Otherwise, it returns 1. static unsigned CanMergeParamLoadStoresStartingAt( unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs, - const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) { - assert(isPowerOf2_32(AccessSize) && "must be a power of 2!"); + const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) { // Can't vectorize if param alignment is not sufficient. - if (AccessSize > ParamAlignment) + if (ParamAlignment < AccessSize) return 1; // Can't vectorize if offset is not aligned. if (Offsets[Idx] & (AccessSize - 1)) @@ -297,7 +280,7 @@ enum ParamVectorizationFlags { static SmallVector<ParamVectorizationFlags, 16> VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs, const SmallVectorImpl<uint64_t> &Offsets, - unsigned ParamAlignment) { + Align ParamAlignment) { // Set vector size to match ValueVTs and mark all elements as // scalars by default. SmallVector<ParamVectorizationFlags, 16> VectorInfo; @@ -1258,8 +1241,8 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { std::string NVPTXTargetLowering::getPrototype( const DataLayout &DL, Type *retTy, const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, - ImmutableCallSite CS) const { + const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment, + const CallBase &CB) const { auto PtrVT = getPointerTy(DL); bool isABI = (STI.getSmVersion() >= 20); @@ -1294,8 +1277,8 @@ std::string NVPTXTargetLowering::getPrototype( O << ".param .b" << PtrVT.getSizeInBits() << " _"; } else if (retTy->isAggregateType() || retTy->isVectorTy() || retTy->isIntegerTy(128)) { - O << ".param .align " << retAlignment << " .b8 _[" - << DL.getTypeAllocSize(retTy) << "]"; + O << ".param .align " << (retAlignment ? retAlignment->value() : 0) + << " .b8 _[" << DL.getTypeAllocSize(retTy) << "]"; } else { llvm_unreachable("Unknown return type"); } @@ -1316,7 +1299,7 @@ std::string NVPTXTargetLowering::getPrototype( if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { unsigned align = 0; - const CallInst *CallI = cast<CallInst>(CS.getInstruction()); + const CallInst *CallI = cast<CallInst>(&CB); // +1 because index 0 is reserved for return type alignment if (!getAlign(*CallI, i + 1, align)) align = DL.getABITypeAlignment(Ty); @@ -1358,9 +1341,9 @@ std::string NVPTXTargetLowering::getPrototype( assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); - unsigned align = Outs[OIdx].Flags.getByValAlign(); + Align align = Outs[OIdx].Flags.getNonZeroByValAlign(); unsigned sz = DL.getTypeAllocSize(ETy); - O << ".param .align " << align << " .b8 "; + O << ".param .align " << align.value() << " .b8 "; O << "_"; O << "[" << sz << "]"; } @@ -1368,31 +1351,29 @@ std::string NVPTXTargetLowering::getPrototype( return O.str(); } -unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, - ImmutableCallSite CS, - Type *Ty, unsigned Idx, - const DataLayout &DL) const { - if (!CS) { +Align NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, + const CallBase *CB, Type *Ty, + unsigned Idx, + const DataLayout &DL) const { + if (!CB) { // CallSite is zero, fallback to ABI type alignment - return DL.getABITypeAlignment(Ty); + return DL.getABITypeAlign(Ty); } - unsigned Align = 0; - const Value *DirectCallee = CS.getCalledFunction(); + unsigned Alignment = 0; + const Function *DirectCallee = CB->getCalledFunction(); if (!DirectCallee) { // We don't have a direct function symbol, but that may be because of // constant cast instructions in the call. - const Instruction *CalleeI = CS.getInstruction(); - assert(CalleeI && "Call target is not a function or derived value?"); // With bitcast'd call targets, the instruction will be the call - if (isa<CallInst>(CalleeI)) { + if (const auto *CI = dyn_cast<CallInst>(CB)) { // Check if we have call alignment metadata - if (getAlign(*cast<CallInst>(CalleeI), Idx, Align)) - return Align; + if (getAlign(*CI, Idx, Alignment)) + return Align(Alignment); - const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue(); + const Value *CalleeV = CI->getCalledOperand(); // Ignore any bitcast instructions while (isa<ConstantExpr>(CalleeV)) { const ConstantExpr *CE = cast<ConstantExpr>(CalleeV); @@ -1404,20 +1385,20 @@ unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // We have now looked past all of the bitcasts. Do we finally have a // Function? - if (isa<Function>(CalleeV)) - DirectCallee = CalleeV; + if (const auto *CalleeF = dyn_cast<Function>(CalleeV)) + DirectCallee = CalleeF; } } // Check for function alignment information if we found that the // ultimate target is a Function if (DirectCallee) - if (getAlign(*cast<Function>(DirectCallee), Idx, Align)) - return Align; + if (getAlign(*DirectCallee, Idx, Alignment)) + return Align(Alignment); // Call is indirect or alignment information is not available, fall back to // the ABI type alignment - return DL.getABITypeAlignment(Ty); + return DL.getABITypeAlign(Ty); } SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -1432,7 +1413,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool &isTailCall = CLI.IsTailCall; ArgListTy &Args = CLI.getArgs(); Type *RetTy = CLI.RetTy; - ImmutableCallSite CS = CLI.CS; + const CallBase *CB = CLI.CB; const DataLayout &DL = DAG.getDataLayout(); bool isABI = (STI.getSmVersion() >= 20); @@ -1465,15 +1446,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<EVT, 16> VTs; SmallVector<uint64_t, 16> Offsets; ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); - unsigned ArgAlign = - getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); + Align ArgAlign = getArgumentAlignment(Callee, CB, Ty, paramCount + 1, DL); unsigned AllocSize = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); bool NeedAlign; // Does argument declaration specify alignment? if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { // declare .param .align <align> .b8 .param<n>[<size>]; SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), + Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), DAG.getConstant(paramCount, dl, MVT::i32), DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, @@ -1554,8 +1534,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust type of the store op if we've extended the scalar // return value. EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; - unsigned EltAlign = - NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0; + MaybeAlign EltAlign; + if (NeedAlign) + EltAlign = commonAlignment(ArgAlign, Offsets[j]); Chain = DAG.getMemIntrinsicNode( Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, @@ -1585,7 +1566,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // declare .param .align <align> .b8 .param<n>[<size>]; unsigned sz = Outs[OIdx].Flags.getByValSize(); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); + Align ArgAlign = Outs[OIdx].Flags.getNonZeroByValAlign(); // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, // so we don't need to worry about natural alignment or not. // See TargetLowering::LowerCallTo(). @@ -1593,18 +1574,19 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Enforce minumum alignment of 4 to work around ptxas miscompile // for sm_50+. See corresponding alignment adjustment in // emitFunctionParamList() for details. - if (ArgAlign < 4) - ArgAlign = 4; - SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(sz, dl, MVT::i32), InFlag}; + if (ArgAlign < Align(4)) + ArgAlign = Align(4); + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(sz, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps); InFlag = Chain.getValue(1); for (unsigned j = 0, je = VTs.size(); j != je; ++j) { EVT elemtype = VTs[j]; int curOffset = Offsets[j]; - unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); + unsigned PartAlign = GreatestCommonDivisor64(ArgAlign.value(), curOffset); auto PtrVT = getPointerTy(DL); SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], DAG.getConstant(curOffset, dl, PtrVT)); @@ -1618,10 +1600,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(paramCount, dl, MVT::i32), DAG.getConstant(curOffset, dl, MVT::i32), theVal, InFlag }; - Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, - CopyParamOps, elemtype, - MachinePointerInfo(), /* Align */ 0, - MachineMemOperand::MOStore); + Chain = DAG.getMemIntrinsicNode( + NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, + MachinePointerInfo(), /* Align */ None, MachineMemOperand::MOStore); InFlag = Chain.getValue(1); } @@ -1629,7 +1610,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode()); - unsigned retAlignment = 0; + MaybeAlign retAlignment = None; // Handle Result if (Ins.size() > 0) { @@ -1657,12 +1638,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DeclareRetOps); InFlag = Chain.getValue(1); } else { - retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL); + assert(retAlignment && "retAlignment is guaranteed to be set"); SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, - DAG.getConstant(retAlignment, dl, MVT::i32), - DAG.getConstant(resultsz / 8, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { + Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32), + DAG.getConstant(resultsz / 8, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag}; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps); InFlag = Chain.getValue(1); @@ -1672,7 +1654,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Both indirect calls and libcalls have nullptr Func. In order to distinguish // between them we must rely on the call site value which is valid for // indirect calls but is always null for libcalls. - bool isIndirectCall = !Func && CS; + bool isIndirectCall = !Func && CB; if (isa<ExternalSymbolSDNode>(Callee)) { Function* CalleeFunc = nullptr; @@ -1695,7 +1677,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The prototype is embedded in a string and put as the operand for a // CallPrototype SDNode which will print out to the value of the string. SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); - std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS); + std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, *CB); const char *ProtoStr = nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); SDValue ProtoOps[] = { @@ -1768,7 +1750,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); assert(VTs.size() == Ins.size() && "Bad value decomposition"); - unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + Align RetAlign = getArgumentAlignment(Callee, CB, RetTy, 0, DL); auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); SmallVector<EVT, 6> LoadVTs; @@ -1784,7 +1766,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool needTruncate = false; EVT TheLoadType = VTs[i]; EVT EltType = Ins[i].VT; - unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]); + Align EltAlign = commonAlignment(RetAlign, Offsets[i]); if (ExtendIntegerRetVal) { TheLoadType = MVT::i32; EltType = MVT::i32; @@ -2320,10 +2302,10 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { MemSDNode *MemSD = cast<MemSDNode>(N); const DataLayout &TD = DAG.getDataLayout(); - unsigned Align = MemSD->getAlignment(); - unsigned PrefAlign = - TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); - if (Align < PrefAlign) { + Align Alignment = MemSD->getAlign(); + Align PrefAlign = + TD.getPrefTypeAlign(ValVT.getTypeForEVT(*DAG.getContext())); + if (Alignment < PrefAlign) { // This store is not sufficiently aligned, so bail out and let this vector // store be scalarized. Note that we may still be able to emit smaller // vector stores. For example, if we are storing a <4 x float> with an @@ -2559,7 +2541,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); assert(VTs.size() > 0 && "Unexpected empty type."); auto VectorInfo = - VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty)); + VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty)); SDValue Arg = getParamSymbol(DAG, idx, PtrVT); int VecIdx = -1; // Index of the first element of the current vector. @@ -2678,7 +2660,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); auto VectorInfo = VectorizePTXValueVTs( - VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1); + VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlign(RetTy) : Align(1)); // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than // 32-bits are sign extended or zero extended, depending on whether @@ -2730,10 +2712,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Adjust type of load/store op if we've extended the scalar // return value. EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; - Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other), - StoreOperands, TheStoreType, - MachinePointerInfo(), /* Align */ 1, - MachineMemOperand::MOStore); + Chain = DAG.getMemIntrinsicNode( + Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType, + MachinePointerInfo(), Align(1), MachineMemOperand::MOStore); // Cleanup vector state. StoreOperands.clear(); } @@ -3799,8 +3780,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = - MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); return true; } @@ -3819,8 +3799,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.flags = MachineMemOperand::MOLoad; - Info.align = - MaybeAlign(cast<ConstantInt>(I.getArgOperand(1))->getZExtValue()); + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); return true; } @@ -4810,11 +4789,10 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, LoadSDNode *LD = cast<LoadSDNode>(N); - unsigned Align = LD->getAlignment(); + Align Alignment = LD->getAlign(); auto &TD = DAG.getDataLayout(); - unsigned PrefAlign = - TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); - if (Align < PrefAlign) { + Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext())); + if (Alignment < PrefAlign) { // This load is not sufficiently aligned, so bail out and let this vector // load be scalarized. Note that we may still be able to emit smaller // vector loads. For example, if we are loading a <4 x float> with an diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 546fe49808e2d..df9cd41599628 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -491,8 +491,7 @@ public: std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl<ISD::OutputArg> &, - unsigned retAlignment, - ImmutableCallSite CS) const; + MaybeAlign retAlignment, const CallBase &CB) const; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -579,8 +578,8 @@ private: SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - unsigned getArgumentAlignment(SDValue Callee, ImmutableCallSite CS, Type *Ty, - unsigned Idx, const DataLayout &DL) const; + Align getArgumentAlignment(SDValue Callee, const CallBase *CB, Type *Ty, + unsigned Idx, const DataLayout &DL) const; }; } // namespace llvm diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index cff230289e600..ec0c92ccf5c52 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -69,7 +69,7 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning +/// analyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns /// with the following information in various cases: diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 83039241a7c75..6cf59d285e8d3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -113,8 +113,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { createMemCpyLoopKnownSize(/* ConvertedInst */ SI, /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, /* CopyLen */ CopyLen, - /* SrcAlign */ LI->getAlignment(), - /* DestAlign */ SI->getAlignment(), + /* SrcAlign */ LI->getAlign(), + /* DestAlign */ SI->getAlign(), /* SrcIsVolatile */ LI->isVolatile(), /* DstIsVolatile */ SI->isVolatile(), TTI); diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp index c3c5f6fbcba72..e60b5eeacdaee 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -159,12 +159,14 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { assert(PType && "Expecting pointer type in handleByValParam"); Type *StructType = PType->getElementType(); - unsigned AS = Func->getParent()->getDataLayout().getAllocaAddrSpace(); + const DataLayout &DL = Func->getParent()->getDataLayout(); + unsigned AS = DL.getAllocaAddrSpace(); AllocaInst *AllocA = new AllocaInst(StructType, AS, Arg->getName(), FirstInst); // Set the alignment to alignment of the byval parameter. This is because, // later load/stores assume that alignment, and we are going to replace // the use of the byval parameter with this alloca instruction. - AllocA->setAlignment(MaybeAlign(Func->getParamAlignment(Arg->getArgNo()))); + AllocA->setAlignment(Func->getParamAlign(Arg->getArgNo()) + .getValueOr(DL.getPrefTypeAlign(StructType))); Arg->replaceAllUsesWith(AllocA); Value *ArgInParam = new AddrSpaceCastInst( diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index a7127b0e9a99d..ea2274f394e61 100644 --- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -67,14 +67,14 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { if (MI.isDebugValue()) { assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); - unsigned Reg; + Register Reg; int64_t Offset = TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg); MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false); MI.getOperand(0).setIsDebug(); auto *DIExpr = DIExpression::prepend( MI.getDebugExpression(), DIExpression::ApplyOffset, Offset); - MI.getOperand(3).setMetadata(DIExpr); + MI.getDebugExpressionOp().setMetadata(DIExpr); continue; } @@ -97,22 +97,21 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { } /// AdjustStackOffset - Helper function used to adjust the stack frame offset. -static inline void -AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, - bool StackGrowsDown, int64_t &Offset, - unsigned &MaxAlign) { +static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + Align &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += MFI.getObjectSize(FrameIdx); - unsigned Align = MFI.getObjectAlignment(FrameIdx); + Align Alignment = MFI.getObjectAlign(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); + MaxAlign = std::max(MaxAlign, Alignment); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = alignTo(Offset, Alignment); if (StackGrowsDown) { LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset @@ -169,7 +168,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // NOTE: We do not have a call stack - unsigned MaxAlign = MFI.getMaxAlignment(); + Align MaxAlign = MFI.getMaxAlign(); // No scavenger @@ -178,10 +177,10 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // frame index registers. Functions which don't want/need this optimization // will continue to use the existing code path. if (MFI.getUseLocalStackAllocationBlock()) { - unsigned Align = MFI.getLocalFrameMaxAlign().value(); + Align Alignment = MFI.getLocalFrameMaxAlign(); // Adjust to alignment boundary. - Offset = (Offset + Align - 1) / Align * Align; + Offset = alignTo(Offset, Alignment); LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); @@ -196,7 +195,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Allocate the local block Offset += MFI.getLocalFrameSize(); - MaxAlign = std::max(Align, MaxAlign); + MaxAlign = std::max(Alignment, MaxAlign); } // No stack protector @@ -227,18 +226,16 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // ensure that the callee's frame or the alloca data is suitably aligned; // otherwise, for leaf functions, align to the TransientStackAlignment // value. - unsigned StackAlign; + Align StackAlign; if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0)) - StackAlign = TFI.getStackAlignment(); + StackAlign = TFI.getStackAlign(); else - StackAlign = TFI.getTransientStackAlignment(); + StackAlign = TFI.getTransientStackAlign(); // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + Offset = alignTo(Offset, std::max(StackAlign, MaxAlign)); } // Update frame info to pretend that this is part of the stack... diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index e213089e40852..8ae542130a14c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -152,7 +152,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!"); StringRef Sym = TexHandleDef.getOperand(6).getSymbolName(); - std::string ParamBaseName = MF.getName(); + std::string ParamBaseName = std::string(MF.getName()); ParamBaseName += "_param_"; assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference"); unsigned Param = atoi(Sym.data()+ParamBaseName.size()); diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 357826c2d19ca..f1fa6416f15fe 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -33,13 +33,13 @@ void NVPTXSubtarget::anchor() {} NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // Provide the default CPU if we don't have one. - TargetName = CPU.empty() ? "sm_20" : CPU; + TargetName = std::string(CPU.empty() ? "sm_20" : CPU); - ParseSubtargetFeatures(TargetName, FS); + ParseSubtargetFeatures(TargetName, FS); - // Set default to PTX 3.2 (CUDA 5.5) - if (PTXVersion == 0) { - PTXVersion = 32; + // Set default to PTX 3.2 (CUDA 5.5) + if (PTXVersion == 0) { + PTXVersion = 32; } return *this; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 0778706d936a7..85709eb731e29 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -117,7 +117,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, getEffectiveCodeModel(CM, CodeModel::Small), OL), is64bit(is64bit), UseShortPointers(UseShortPointersOpt), TLOF(std::make_unique<NVPTXTargetObjectFile>()), - Subtarget(TT, CPU, FS, *this) { + Subtarget(TT, std::string(CPU), std::string(FS), *this) { if (TT.getOS() == Triple::NVCL) drvInterface = NVPTX::NVCL; else @@ -276,8 +276,6 @@ void NVPTXPassConfig::addIRPasses() { addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine())); if (getOptLevel() != CodeGenOpt::None) { addAddressSpaceInferencePasses(); - if (!DisableLoadStoreVectorizer) - addPass(createLoadStoreVectorizerPass()); addStraightLineScalarOptimizationPasses(); } @@ -295,8 +293,11 @@ void NVPTXPassConfig::addIRPasses() { // %1 = shl %a, 2 // // but EarlyCSE can do neither of them. - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) { addEarlyCSEOrGVNPass(); + if (!DisableLoadStoreVectorizer) + addPass(createLoadStoreVectorizerPass()); + } } bool NVPTXPassConfig::addInstSelector() { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h index ab2a93b759227..366d92a5a8054 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -27,7 +27,7 @@ public: MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C, - unsigned &Align) const override { + Align &Alignment) const override { return ReadOnlySection; } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index afc40a7abed08..3873c73fb2e03 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -112,7 +112,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { } int NVPTXTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, const Instruction *CxtI) { @@ -123,7 +124,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost( switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: @@ -136,7 +138,8 @@ int NVPTXTTIImpl::getArithmeticInstrCost( if (LT.second.SimpleTy == MVT::i64) return 2 * LT.first; // Delegate other cases to the basic TTI. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } } @@ -152,3 +155,8 @@ void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, UP.Partial = UP.Runtime = true; UP.PartialThreshold = UP.Threshold / 4; } + +void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 864d8b91a89a5..cb832031f1add 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -50,13 +50,11 @@ public: // Loads and stores can be vectorized if the alignment is at least as big as // the load/store we want to vectorize. - bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, - unsigned Alignment, + bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return Alignment >= ChainSizeInBytes; } - bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, - unsigned Alignment, + bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const { return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace); } @@ -87,6 +85,7 @@ public: int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -96,6 +95,10 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { // Volatile loads/stores are only supported for shared and global address // spaces, or for generic AS that maps to them. diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 43c2e9920403e..74d129d330f30 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -226,17 +226,17 @@ bool isManaged(const Value &val) { std::string getTextureName(const Value &val) { assert(val.hasName() && "Found texture variable with no name"); - return val.getName(); + return std::string(val.getName()); } std::string getSurfaceName(const Value &val) { assert(val.hasName() && "Found surface variable with no name"); - return val.getName(); + return std::string(val.getName()); } std::string getSamplerName(const Value &val) { assert(val.hasName() && "Found sampler variable with no name"); - return val.getName(); + return std::string(val.getName()); } bool getMaxNTIDx(const Function &F, unsigned &x) { |