diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-01-18 16:17:27 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-01-18 16:17:27 +0000 |
commit | 67c32a98315f785a9ec9d531c1f571a0196c7463 (patch) | |
tree | 4abb9cbeecc7901726dd0b4a37369596c852e9ef /lib/Target/NVPTX | |
parent | 9f61947910e6ab40de38e6b4034751ef1513200f (diff) |
Diffstat (limited to 'lib/Target/NVPTX')
40 files changed, 507 insertions, 479 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 4e35b1811295..3a4a19dc3991 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -9,26 +9,28 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(NVPTXCommonTableGen) set(NVPTXCodeGen_sources + NVPTXAllocaHoisting.cpp + NVPTXAsmPrinter.cpp + NVPTXAssignValidGlobalNames.cpp NVPTXFavorNonGenericAddrSpaces.cpp NVPTXFrameLowering.cpp - NVPTXInstrInfo.cpp + NVPTXGenericToNVVM.cpp NVPTXISelDAGToDAG.cpp NVPTXISelLowering.cpp + NVPTXImageOptimizer.cpp + NVPTXInstrInfo.cpp + NVPTXLowerAggrCopies.cpp + NVPTXLowerStructArgs.cpp + NVPTXMCExpr.cpp + NVPTXPrologEpilogPass.cpp NVPTXRegisterInfo.cpp + NVPTXReplaceImageHandles.cpp NVPTXSubtarget.cpp NVPTXTargetMachine.cpp - NVPTXLowerAggrCopies.cpp - NVPTXutil.cpp - NVPTXAllocaHoisting.cpp - NVPTXAsmPrinter.cpp + NVPTXTargetTransformInfo.cpp NVPTXUtilities.cpp + NVPTXutil.cpp NVVMReflect.cpp - NVPTXGenericToNVVM.cpp - NVPTXAssignValidGlobalNames.cpp - NVPTXPrologEpilogPass.cpp - NVPTXMCExpr.cpp - NVPTXReplaceImageHandles.cpp - NVPTXImageOptimizer.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 1fb3c57390c2..04969642fd37 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_INST_PRINTER_H -#define NVPTX_INST_PRINTER_H +#ifndef LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H +#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index 16ec19c25f16..a72ae2ef53a7 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXBASEINFO_H -#define NVPTXBASEINFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H +#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H namespace llvm { diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 366341afe1b8..11d737ec187f 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -25,7 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile", void NVPTXMCAsmInfo::anchor() {} -NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) { +NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { Triple TheTriple(TT); if (TheTriple.getArch() == Triple::nvptx64) { PointerSize = CalleeSaveStackSlotSize = 8; @@ -33,8 +33,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) { CommentString = "//"; - HasSetDirective = false; - HasSingleParameterDotFile = false; InlineAsmStart = " inline asm"; @@ -52,5 +50,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) { AscizDirective = " .b8"; // @TODO: Can we just disable this? + WeakDirective = "\t// .weak\t"; GlobalDirective = "\t// .globl\t"; } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h index 7d1633f60d2c..c3242866b177 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_MCASM_INFO_H -#define NVPTX_MCASM_INFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H +#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H #include "llvm/MC/MCAsmInfo.h" @@ -23,8 +23,8 @@ class StringRef; class NVPTXMCAsmInfo : public MCAsmInfo { virtual void anchor(); public: - explicit NVPTXMCAsmInfo(const StringRef &TT); + explicit NVPTXMCAsmInfo(StringRef TT); }; } // namespace llvm -#endif // NVPTX_MCASM_INFO_H +#endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h index af95c76f92b2..98821d231378 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXMCTARGETDESC_H -#define NVPTXMCTARGETDESC_H +#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H +#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H namespace llvm { class Target; diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index f9fb05922920..a2d670f8d39d 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_MANAGED_STRING_H -#define LLVM_SUPPORT_MANAGED_STRING_H +#ifndef LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H +#define LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H #include "llvm/ADT/SmallVector.h" #include <string> diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index e74c808f8554..a3382eb00003 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_NVPTX_H -#define LLVM_TARGET_NVPTX_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H +#define LLVM_LIB_TARGET_NVPTX_NVPTX_H #include "MCTargetDesc/NVPTXBaseInfo.h" #include "llvm/ADT/StringMap.h" @@ -59,8 +59,9 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { llvm_unreachable("Unknown condition code"); } -FunctionPass * -createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); +ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM); +FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, + llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); ModulePass *createGenericToNVVMPass(); FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass(); @@ -69,6 +70,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); +FunctionPass *createNVPTXLowerStructArgsPass(); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 5b610687e391..69fc86e75414 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_ALLOCA_HOISTING_H_ -#define NVPTX_ALLOCA_HOISTING_H_ +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/IR/DataLayout.h" @@ -47,4 +47,4 @@ extern FunctionPass *createAllocaHoisting(); } // end namespace llvm -#endif // NVPTX_ALLOCA_HOISTING_H_ +#endif diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 187b88c1d54a..beec9b22921d 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" -#include "NVPTXMachineFunctionInfo.h" #include "NVPTXMCExpr.h" +#include "NVPTXMachineFunctionInfo.h" #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" #include "NVPTXUtilities.h" @@ -88,12 +88,9 @@ void VisitGlobalVariableForEmission( return; // Do we have a circular dependency? - if (Visiting.count(GV)) + if (!Visiting.insert(GV).second) report_fatal_error("Circular dependency found in global variable set"); - // Start visiting this global - Visiting.insert(GV); - // Make sure we visit all dependents first DenseSet<const GlobalVariable *> Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) @@ -111,159 +108,6 @@ void VisitGlobalVariableForEmission( } } -// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we -// cannot just link to the existing version. -/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. -/// -using namespace nvptx; -const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { - MCContext &Ctx = AP.OutContext; - - if (CV->isNullValue() || isa<UndefValue>(CV)) - return MCConstantExpr::Create(0, Ctx); - - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) - return MCConstantExpr::Create(CI->getZExtValue(), Ctx); - - if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); - - if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) - return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); - - const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); - if (!CE) - llvm_unreachable("Unknown constant value to lower!"); - - switch (CE->getOpcode()) { - default: - // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using DataLayout as a - // last resort before giving up. - if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) - if (C != CE) - return LowerConstant(C, AP); - - // Otherwise report the problem to the user. - { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/ false, - !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); - } - case Instruction::AddrSpaceCast: { - // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be - // handled by the generic() logic in the MCExpr printer - PointerType *DstTy = cast<PointerType>(CE->getType()); - PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType()); - if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) { - return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP); - } - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/ false, - !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); - } - case Instruction::GetElementPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); - // Generate a symbolic expression for the byte address - APInt OffsetAI(TD.getPointerSizeInBits(), 0); - cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); - - const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); - if (!OffsetAI) - return Base; - - int64_t Offset = OffsetAI.getSExtValue(); - return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), - Ctx); - } - - case Instruction::Trunc: - // We emit the value and depend on the assembler to truncate the generated - // expression properly. This is important for differences between - // blockaddress labels. Since the two labels are in the same function, it - // is reasonable to treat their delta as a 32-bit value. - // FALL THROUGH. - case Instruction::BitCast: - return LowerConstant(CE->getOperand(0), AP); - - case Instruction::IntToPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); - // Handle casts to pointers by changing them into casts to the appropriate - // integer type. This promotes constant folding and simplifies this code. - Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), - false /*ZExt*/); - return LowerConstant(Op, AP); - } - - case Instruction::PtrToInt: { - const DataLayout &TD = *AP.TM.getDataLayout(); - // Support only foldable casts to/from pointers that can be eliminated by - // changing the pointer to the appropriately sized integer type. - Constant *Op = CE->getOperand(0); - Type *Ty = CE->getType(); - - const MCExpr *OpExpr = LowerConstant(Op, AP); - - // We can emit the pointer value into this slot if the slot is an - // integer slot equal to the size of the pointer. - if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) - return OpExpr; - - // Otherwise the pointer is smaller than the resultant integer, mask off - // the high bits so we are sure to get a proper truncation if the input is - // a constant expr. - unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = - MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); - return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); - } - - // The MC library also has a right-shift operator, but it isn't consistently - // signed or unsigned between different targets. - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::Shl: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); - const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); - switch (CE->getOpcode()) { - default: - llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: - return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); - case Instruction::Sub: - return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); - case Instruction::Mul: - return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); - case Instruction::SDiv: - return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); - case Instruction::SRem: - return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); - case Instruction::Shl: - return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); - case Instruction::And: - return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); - case Instruction::Or: - return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); - case Instruction::Xor: - return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); - } - } - } -} - void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { if (!EmitLineNumbers) return; @@ -502,8 +346,8 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { } void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); - const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); Type *Ty = F->getReturnType(); @@ -530,17 +374,15 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { } else if (isa<PointerType>(Ty)) { O << ".param .b" << TLI->getPointerTy().getSizeInBits() << " func_retval0"; - } else { - if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { - unsigned totalsz = TD->getTypeAllocSize(Ty); - unsigned retAlignment = 0; - if (!llvm::getAlign(*F, 0, retAlignment)) - retAlignment = TD->getABITypeAlignment(Ty); - O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz - << "]"; - } else - assert(false && "Unknown return type"); - } + } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { + unsigned totalsz = TD->getTypeAllocSize(Ty); + unsigned retAlignment = 0; + if (!llvm::getAlign(*F, 0, retAlignment)) + retAlignment = TD->getABITypeAlignment(Ty); + O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz + << "]"; + } else + llvm_unreachable("Unknown return type"); } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); @@ -626,13 +468,14 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() { void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); if (TRI->isVirtualRegister(RegNo)) { OutStreamer.AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { - OutStreamer.AddComment(Twine("implicit-def: ") + - TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddComment( + Twine("implicit-def: ") + + TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); } OutStreamer.AddBlankLine(); } @@ -794,11 +637,6 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { return false; } - if (const MDNode *md = dyn_cast<MDNode>(U)) - if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || - (md->getName().str() == "llvm.dbg.sp"))) - return true; - for (const User *UU : U->users()) if (usedInOneFunc(UU, oneFunc) == false) return false; @@ -953,7 +791,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(TM.getDataLayout()); + Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); // Emit header before any dwarf directives are emitted below. emitHeader(M, OS1); @@ -1154,7 +992,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->getName().startswith("nvvm.")) return; - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1288,7 +1126,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; // Special case: ABI requires that we use .u8 for predicates if (ETy->isIntegerTy(1)) @@ -1457,7 +1295,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1470,7 +1308,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isSingleValueType()) { + if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; @@ -1509,17 +1347,6 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (ATy) return getOpenCLAlignment(TD, ATy->getElementType()); - const VectorType *VTy = dyn_cast<VectorType>(Ty); - if (VTy) { - Type *ETy = VTy->getElementType(); - unsigned int numE = VTy->getNumElements(); - unsigned int alignE = TD->getPrefTypeAlignment(ETy); - if (numE == 3) - return 4 * alignE; - else - return numE * alignE; - } - const StructType *STy = dyn_cast<StructType>(Ty); if (STy) { unsigned int alignStruct = 1; @@ -1578,9 +1405,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { } void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); Function::const_arg_iterator I, E; unsigned paramIndex = 0; bool first = true; @@ -1771,7 +1598,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( // Map the global virtual register number to a register class specific // virtual register number starting from 1 with that class. - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); //unsigned numRegClasses = TRI->getNumRegClasses(); // Emit the Fake Stack Object @@ -1901,7 +1728,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { } return; } else { - O << *LowerConstant(CPV, *this); + O << *lowerConstant(CPV); return; } } @@ -1911,7 +1738,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); @@ -2035,7 +1862,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); int Bytes; // Old constants diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index a9f9bdd6d3d8..c11b5793b22a 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXASMPRINTER_H -#define NVPTXASMPRINTER_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H #include "NVPTX.h" #include "NVPTXSubtarget.h" @@ -39,13 +39,6 @@ // A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer // (subclass of MCStreamer). -// This is defined in AsmPrinter.cpp. -// Used to process the constant expressions in initializers. -namespace nvptx { -const llvm::MCExpr * -LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP); -} - namespace llvm { class LineReader { @@ -86,13 +79,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // Once we have this AggBuffer setup, we can choose how to print // it out. public: - unsigned size; // size of the buffer in bytes - unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses - SmallVector<unsigned, 4> symbolPosInBuffer; - SmallVector<const Value *, 4> Symbols; private: + const unsigned size; // size of the buffer in bytes + std::vector<unsigned char> buffer; // the buffer + SmallVector<unsigned, 4> symbolPosInBuffer; + SmallVector<const Value *, 4> Symbols; unsigned curpos; raw_ostream &O; NVPTXAsmPrinter &AP; @@ -100,14 +93,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { public: AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - : O(_O), AP(_AP) { - buffer = new unsigned char[_size]; - size = _size; + : size(_size), buffer(_size), O(_O), AP(_AP) { curpos = 0; numSymbols = 0; EmitGeneric = AP.EmitGeneric; } - ~AggBuffer() { delete[] buffer; } unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { assert((curpos + Num) <= size); assert((curpos + Bytes) <= size); @@ -170,7 +160,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { O << *Name; } } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { - O << *nvptx::LowerConstant(Cexpr, AP); + O << *AP.lowerConstant(Cexpr); } else llvm_unreachable("symbol type unknown"); nSym++; @@ -179,9 +169,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { else nextSymbolPos = symbolPosInBuffer[nSym]; } else if (nBytes == 4) - O << *(unsigned int *)(buffer + pos); + O << *(unsigned int *)(&buffer[pos]); else - O << *(unsigned long long *)(buffer + pos); + O << *(unsigned long long *)(&buffer[pos]); } } } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 8b088412dbba..314df3828b88 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -48,20 +48,20 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { if (is64bit) { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass); MachineInstr *MI = - BuildMI(MBB, MBBI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64), + BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get( + NVPTX::cvta_local_yes_64), NVPTX::VRFrame).addReg(LocalReg); BuildMI(MBB, MI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), + MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), LocalReg).addImm(MF.getFunctionNumber()); } else { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass); MachineInstr *MI = BuildMI(MBB, MBBI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes), + MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes), NVPTX::VRFrame).addReg(LocalReg); BuildMI(MBB, MI, dl, - MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), + MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), LocalReg).addImm(MF.getFunctionNumber()); } } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 56fb673de0eb..0846b78d58e5 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_FRAMELOWERING_H -#define NVPTX_FRAMELOWERING_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H #include "llvm/Target/TargetFrameLowering.h" diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index faa9fdb424b6..5f06d9a791a1 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueMap.h" #include "llvm/PassManager.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -54,8 +55,7 @@ private: IRBuilder<> &Builder); Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C, IRBuilder<> &Builder); - void remapNamedMDNode(Module *M, NamedMDNode *N); - MDNode *remapMDNode(Module *M, MDNode *N); + void remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N); typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy; typedef ValueMap<Constant *, Value *> ConstantToValueMapTy; @@ -125,12 +125,17 @@ bool GenericToNVVM::runOnModule(Module &M) { ConstantToValueMap.clear(); } + // Copy GVMap over to a standard value map. + ValueToValueMapTy VM; + for (auto I = GVMap.begin(), E = GVMap.end(); I != E; ++I) + VM[I->first] = I->second; + // Walk through the metadata section and update the debug information // associated with the global variables in the default address space. for (Module::named_metadata_iterator I = M.named_metadata_begin(), E = M.named_metadata_end(); I != E; I++) { - remapNamedMDNode(&M, I); + remapNamedMDNode(VM, I); } // Walk through the global variable initializers, and replace any use of @@ -140,20 +145,23 @@ bool GenericToNVVM::runOnModule(Module &M) { for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) { GlobalVariable *GV = I->first; GlobalVariable *NewGV = I->second; - ++I; + + // Remove GV from the map so that it can be RAUWed. Note that + // DenseMap::erase() won't invalidate any iterators but this one. + auto Next = std::next(I); + GVMap.erase(I); + I = Next; + Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType()); // At this point, the remaining uses of GV should be found only in global // variable initializers, as other uses have been already been removed // while walking through the instructions in function definitions. - for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end(); - UI != UE;) - (UI++)->set(BitCastNewGV); + GV->replaceAllUsesWith(BitCastNewGV); std::string Name = GV->getName(); - GV->removeDeadConstantUsers(); GV->eraseFromParent(); NewGV->setName(Name); } - GVMap.clear(); + assert(GVMap.empty() && "Expected it to be empty by now"); return true; } @@ -359,7 +367,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, } } -void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { +void GenericToNVVM::remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N) { bool OperandChanged = false; SmallVector<MDNode *, 16> NewOperands; @@ -369,7 +377,7 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { // converted to another value. for (unsigned i = 0; i < NumOperands; ++i) { MDNode *Operand = N->getOperand(i); - MDNode *NewOperand = remapMDNode(M, Operand); + MDNode *NewOperand = MapMetadata(Operand, VM); OperandChanged |= Operand != NewOperand; NewOperands.push_back(NewOperand); } @@ -387,47 +395,3 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { N->addOperand(*I); } } - -MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) { - - bool OperandChanged = false; - SmallVector<Value *, 8> NewOperands; - unsigned NumOperands = N->getNumOperands(); - - // Check if any operand is or contains a global variable in GVMap, and thus - // converted to another value. - for (unsigned i = 0; i < NumOperands; ++i) { - Value *Operand = N->getOperand(i); - Value *NewOperand = Operand; - if (Operand) { - if (isa<GlobalVariable>(Operand)) { - GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand)); - if (I != GVMap.end()) { - NewOperand = I->second; - if (++i < NumOperands) { - NewOperands.push_back(NewOperand); - // Address space of the global variable follows the global variable - // in the global variable debug info (see createGlobalVariable in - // lib/Analysis/DIBuilder.cpp). - NewOperand = - ConstantInt::get(Type::getInt32Ty(M->getContext()), - I->second->getType()->getAddressSpace()); - } - } - } else if (isa<MDNode>(Operand)) { - NewOperand = remapMDNode(M, cast<MDNode>(Operand)); - } - } - OperandChanged |= Operand != NewOperand; - NewOperands.push_back(NewOperand); - } - - // If none of the operands has been modified, return N as it is. - if (!OperandChanged) { - return N; - } - - // If any of the operands has been modified, create a new MDNode with the new - // operands. - return MDNode::get(M->getContext(), makeArrayRef(NewOperands)); -} diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 05205fba1aff..cd0422d78a8c 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -27,7 +27,7 @@ using namespace llvm; static cl::opt<int> UsePrecDivF32( "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" - " IEEE Compliant F32 div.rnd if avaiable."), + " IEEE Compliant F32 div.rnd if available."), cl::init(2)); static cl::opt<bool> @@ -5041,17 +5041,10 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { const Value *Src = nullptr; - // Even though MemIntrinsicSDNode is a subclas of MemSDNode, - // the classof() for MemSDNode does not include MemIntrinsicSDNode - // (See SelectionDAGNodes.h). So we need to check for both. if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { if (spN == 0 && mN->getMemOperand()->getPseudoValue()) return true; Src = mN->getMemOperand()->getValue(); - } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { - if (spN == 0 && mN->getMemOperand()->getPseudoValue()) - return true; - Src = mN->getMemOperand()->getValue(); } if (!Src) return false; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index c62fc253c33d..69afcd7320d3 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H + #include "NVPTX.h" #include "NVPTXISelLowering.h" #include "NVPTXRegisterInfo.h" @@ -92,3 +95,5 @@ private: }; } + +#endif diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index d76b20a29eb7..093ba1a2b824 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -106,8 +106,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, } // NVPTXTargetLowering Constructor. -NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) - : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), +NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) + : TargetLowering(TM), nvTM(&TM), nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { // always lower memset, memcpy, and memmove intrinsics to load/store @@ -203,8 +203,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); // Turn FP extload into load/fextend - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); @@ -214,12 +215,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setTruncStoreAction(MVT::i64, MVT::i1, Expand); - setTruncStoreAction(MVT::i32, MVT::i1, Expand); - setTruncStoreAction(MVT::i16, MVT::i1, Expand); - setTruncStoreAction(MVT::i8, MVT::i1, Expand); + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setTruncStoreAction(VT, MVT::i1, Expand); + } // This is legal in NVPTX setOperationAction(ISD::ConstantFP, MVT::f64, Legal); @@ -232,9 +232,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::ADDE, MVT::i64, Expand); // Register custom handling for vector loads/stores - for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; - ++i) { - MVT VT = (MVT::SimpleValueType) i; + for (MVT VT : MVT::vector_valuetypes()) { if (IsPTXVectorType(VT)) { setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -905,16 +903,14 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) { O << ".param .b" << getPointerTy().getSizeInBits() << " _"; + } else if ((retTy->getTypeID() == Type::StructTyID) || + isa<VectorType>(retTy)) { + O << ".param .align " + << retAlignment + << " .b8 _[" + << getDataLayout()->getTypeAllocSize(retTy) << "]"; } else { - if((retTy->getTypeID() == Type::StructTyID) || - isa<VectorType>(retTy)) { - O << ".param .align " - << retAlignment - << " .b8 _[" - << getDataLayout()->getTypeAllocSize(retTy) << "]"; - } else { - assert(false && "Unknown return type"); - } + llvm_unreachable("Unknown return type"); } O << ") "; } @@ -1355,7 +1351,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // .param .align 16 .b8 retval0[<size-in-bytes>], or // .param .b<size-in-bits> retval0 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); - if (retTy->isSingleValueType()) { + // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for + // these three types to match the logic in + // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. + // Plus, this behavior is consistent with nvcc's. + if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -1451,8 +1452,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT ObjectVT = getValueType(retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); - assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(), - ObjectVT) == NumElts && + assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( + F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); bool needTruncate = sz < 8 ? true : false; @@ -2028,7 +2029,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = DAG.getTarget().getTargetLowering(); + const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; @@ -2142,7 +2143,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ISD::SEXTLOAD : ISD::ZEXTLOAD; p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr, MachinePointerInfo(srcValue), partVT, false, - false, partAlign); + false, false, partAlign); } else { p = DAG.getLoad(partVT, dl, Root, srcAddr, MachinePointerInfo(srcValue), false, false, false, @@ -2163,7 +2164,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( unsigned NumElts = ObjectVT.getVectorNumElements(); assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); - unsigned Ofst = 0; EVT EltVT = ObjectVT.getVectorElementType(); // V1 load @@ -2172,10 +2172,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // We only have one element, so just directly load it Value *SrcValue = Constant::getNullValue(PointerType::get( EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, getPointerTy())); SDValue P = DAG.getLoad( - EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, true, TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); if (P.getNode()) @@ -2184,7 +2182,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P); InVals.push_back(P); - Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext())); ++InsIdx; } else if (NumElts == 2) { // V2 load @@ -2192,10 +2189,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2); Value *SrcValue = Constant::getNullValue(PointerType::get( VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, getPointerTy())); SDValue P = DAG.getLoad( - VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, + VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, true, TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); if (P.getNode()) @@ -2213,7 +2208,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( InVals.push_back(Elt0); InVals.push_back(Elt1); - Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); InsIdx += 2; } else { // V4 loads @@ -2231,6 +2225,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( VecSize = 2; } EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); + unsigned Ofst = 0; for (unsigned i = 0; i < NumElts; i += VecSize) { Value *SrcValue = Constant::getNullValue( PointerType::get(VecVT.getTypeForEVT(F->getContext()), @@ -2275,6 +2270,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( ISD::SEXTLOAD : ISD::ZEXTLOAD; p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), ObjectVT, false, false, + false, TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); } else { p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, @@ -3269,16 +3265,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); return true; } @@ -3298,16 +3285,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.vol = 0; Info.readMem = true; Info.writeMem = false; - - // alignment is available as metadata. - // Grab it and set the alignment. - assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); - MDNode *AlignMD = I.getMetadata("align"); - assert(AlignMD && "Must have a non-null MDNode"); - assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); - Value *Align = AlignMD->getOperand(0); - int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue(); - Info.align = Alignment; + Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); return true; } @@ -3866,8 +3844,8 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, } else if (N0.getOpcode() == ISD::FMUL) { if (VT == MVT::f32 || VT == MVT::f64) { - NVPTXTargetLowering *TLI = - (NVPTXTargetLowering *)&DAG.getTargetLoweringInfo(); + const auto *TLI = static_cast<const NVPTXTargetLowering *>( + &DAG.getTargetLoweringInfo()); if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) return SDValue(); @@ -4053,13 +4031,13 @@ static bool IsMulWideOperandDemotable(SDValue Op, if (Op.getOpcode() == ISD::SIGN_EXTEND || Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() == OptSize) { + if (OrigVT.getSizeInBits() <= OptSize) { S = Signed; return true; } } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() == OptSize) { + if (OrigVT.getSizeInBits() <= OptSize) { S = Unsigned; return true; } @@ -4514,3 +4492,10 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { delete DwarfRangesSection; delete DwarfMacroInfoSection; } + +const MCSection * +NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + return getDataSection(); +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index bef6ed9faad6..b3fea3f4a36a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXISELLOWERING_H -#define NVPTXISELLOWERING_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H #include "NVPTX.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -436,7 +436,7 @@ class NVPTXSubtarget; //===--------------------------------------------------------------------===// class NVPTXTargetLowering : public TargetLowering { public: - explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); + explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -495,7 +495,7 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - NVPTXTargetMachine *nvTM; + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } @@ -505,9 +505,9 @@ public: bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; - virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { - return true; - } + bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + + bool enableAggressiveFMAFusion(EVT VT) const override { return true; } private: const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here @@ -538,4 +538,4 @@ private: }; } // namespace llvm -#endif // NVPTXISELLOWERING_H +#endif diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp index a98fb37f6e25..aa36b6be7250 100644 --- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp +++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -16,11 +16,11 @@ #include "NVPTX.h" #include "NVPTXUtilities.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index b5b4fbed0799..740ca0328efe 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -14,11 +14,11 @@ #include "NVPTX.h" #include "NVPTXInstrInfo.h" #include "NVPTXTargetMachine.h" -#include "llvm/IR/Function.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 2ac29748676a..6de75364a823 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXINSTRUCTIONINFO_H -#define NVPTXINSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H #include "NVPTX.h" #include "NVPTXRegisterInfo.h" diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 9900b8c8433f..2c571c4878a8 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -296,7 +296,7 @@ multiclass F2<string OpcStr, SDNode OpNode> { // General Type Conversion //----------------------------------- -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { // Generate a cvt to the given type from all possible types. // Each instance takes a CvtMode immediate that defines the conversion mode to // use. It can be CvtNONE to omit a conversion mode. @@ -2094,7 +2094,7 @@ multiclass LD<NVPTXRegClass regclass> { "$fromWidth \t$dst, [$addr+$offset];"), []>; } -let mayLoad=1, neverHasSideEffects=1 in { +let mayLoad=1, hasSideEffects=0 in { defm LD_i8 : LD<Int16Regs>; defm LD_i16 : LD<Int16Regs>; defm LD_i32 : LD<Int32Regs>; @@ -2136,7 +2136,7 @@ multiclass ST<NVPTXRegClass regclass> { " \t[$addr+$offset], $src;"), []>; } -let mayStore=1, neverHasSideEffects=1 in { +let mayStore=1, hasSideEffects=0 in { defm ST_i8 : ST<Int16Regs>; defm ST_i16 : ST<Int16Regs>; defm ST_i32 : ST<Int32Regs>; @@ -2220,7 +2220,7 @@ multiclass LD_VEC<NVPTXRegClass regclass> { "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), []>; } -let mayLoad=1, neverHasSideEffects=1 in { +let mayLoad=1, hasSideEffects=0 in { defm LDV_i8 : LD_VEC<Int16Regs>; defm LDV_i16 : LD_VEC<Int16Regs>; defm LDV_i32 : LD_VEC<Int32Regs>; @@ -2303,7 +2303,7 @@ multiclass ST_VEC<NVPTXRegClass regclass> { "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), []>; } -let mayStore=1, neverHasSideEffects=1 in { +let mayStore=1, hasSideEffects=0 in { defm STV_i8 : ST_VEC<Int16Regs>; defm STV_i16 : ST_VEC<Int16Regs>; defm STV_i32 : ST_VEC<Int32Regs>; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index 5ec1fc969687..8759406a6803 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_LOWER_AGGR_COPIES_H -#define NVPTX_LOWER_AGGR_COPIES_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/IR/DataLayout.h" diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp new file mode 100644 index 000000000000..3149399afb30 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp @@ -0,0 +1,134 @@ +//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Copy struct args to local memory. This is needed for kernel functions only. +// This is a preparation for handling cases like +// +// kernel void foo(struct A arg, ...) +// { +// struct A *p = &arg; +// ... +// ... = p->filed1 ... (this is no generic address for .param) +// p->filed2 = ... (this is no write access to .param) +// } +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace llvm { +void initializeNVPTXLowerStructArgsPass(PassRegistry &); +} + +class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass { + bool runOnFunction(Function &F) override; + + void handleStructPtrArgs(Function &); + void handleParam(Argument *); + +public: + static char ID; // Pass identification, replacement for typeid + NVPTXLowerStructArgs() : FunctionPass(ID) {} + const char *getPassName() const override { + return "Copy structure (byval *) arguments to stack"; + } +}; + +char NVPTXLowerStructArgs::ID = 1; + +INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args", + "Lower structure arguments (NVPTX)", false, false) + +void NVPTXLowerStructArgs::handleParam(Argument *Arg) { + Function *Func = Arg->getParent(); + Instruction *FirstInst = &(Func->getEntryBlock().front()); + PointerType *PType = dyn_cast<PointerType>(Arg->getType()); + + assert(PType && "Expecting pointer type in handleParam"); + + Type *StructType = PType->getElementType(); + AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); + + /* Set the alignment to alignment of the byval parameter. This is because, + * later load/stores assume that alignment, and we are going to replace + * the use of the byval parameter with this alloca instruction. + */ + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); + + Arg->replaceAllUsesWith(AllocA); + + // Get the cvt.gen.to.param intrinsic + Type *CvtTypes[] = { + Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM), + Type::getInt8PtrTy(Func->getParent()->getContext(), + ADDRESS_SPACE_GENERIC)}; + Function *CvtFunc = Intrinsic::getDeclaration( + Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes); + + Value *BitcastArgs[] = { + new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(), + ADDRESS_SPACE_GENERIC), + Arg->getName(), FirstInst)}; + CallInst *CallCVT = + CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst); + + BitCastInst *BitCast = new BitCastInst( + CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM), + Arg->getName(), FirstInst); + LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst); + new StoreInst(LI, AllocA, FirstInst); +} + +// ============================================================================= +// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then +// add the following instructions to the first basic block : +// +// %temp = alloca %struct.x, align 8 +// %tt1 = bitcast %struct.x * %d to i8 * +// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2 +// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) * +// %tv = load %struct.x addrspace(101) * %tempd +// store %struct.x %tv, %struct.x * %temp, align 8 +// +// The above code allocates some space in the stack and copies the incoming +// struct from param space to local space. +// Then replace all occurences of %d by %temp. +// ============================================================================= +void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) { + for (Argument &Arg : F.args()) { + if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) { + handleParam(&Arg); + } + } +} + +// ============================================================================= +// Main function for this pass. +// ============================================================================= +bool NVPTXLowerStructArgs::runOnFunction(Function &F) { + // Skip non-kernels. See the comments at the top of this file. + if (!isKernelFunction(F)) + return false; + + handleStructPtrArgs(F); + return true; +} + +FunctionPass *llvm::createNVPTXLowerStructArgsPass() { + return new NVPTXLowerStructArgs(); +} diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 554764930a9e..d39a394fe750 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -9,8 +9,8 @@ // Modeled after ARMMCExpr -#ifndef NVPTXMCEXPR_H -#define NVPTXMCEXPR_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H #include "llvm/ADT/APFloat.h" #include "llvm/MC/MCExpr.h" @@ -63,7 +63,8 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const override { + const MCAsmLayout *Layout, + const MCFixup *Fixup) const override { return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h index 67fb39050797..10f1135ad841 100644 --- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h +++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H + #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -44,3 +47,5 @@ public: } }; } + +#endif diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 348ab0c4bf14..a1e1b9e74480 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -48,8 +49,8 @@ char NVPTXPrologEpilogPass::ID = 0; bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering &TFI = *TM.getFrameLowering(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering(); + const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo(); bool Modified = false; calculateFrameObjectOffsets(MF); @@ -108,8 +109,8 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, void NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index a7594be121a0..d2e67331f788 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXREGISTERINFO_H -#define NVPTXREGISTERINFO_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H #include "ManagedStringPool.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index 20d4e272341e..b7f53c7929d1 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -16,11 +16,11 @@ #include "NVPTX.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXSubtarget.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseSet.h" using namespace llvm; @@ -33,9 +33,9 @@ private: public: NVPTXReplaceImageHandles(); - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "NVPTX Replace Image Handles"; } private: diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index aa0436bf0da7..f1d3cb4da51b 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_NVPTXSECTION_H -#define LLVM_NVPTXSECTION_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCSection.h" diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index d5cded218362..3d52532310ff 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -59,7 +59,8 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), SmVersion(20), DL(computeDataLayout(is64Bit)), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) { + TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL), + FrameLowering(*this) { Triple T(TT); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 4c41e4e470dd..fb2d4047631a 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXSUBTARGET_H -#define NVPTXSUBTARGET_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H #include "NVPTX.h" #include "NVPTXFrameLowering.h" @@ -57,14 +57,20 @@ public: NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool is64Bit); - const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } - const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - const DataLayout *getDataLayout() const { return &DL; } - const NVPTXRegisterInfo *getRegisterInfo() const { + const TargetFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const DataLayout *getDataLayout() const override { return &DL; } + const NVPTXRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } - const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; } - const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const NVPTXTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } bool hasBrkPt() const { return SmVersion >= 11; } bool hasAtomRedG32() const { return SmVersion >= 11; } @@ -113,4 +119,4 @@ public: } // End llvm namespace -#endif // NVPTXSUBTARGET_H +#endif diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 069a1b9966f0..c7f95071b9fc 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -16,6 +16,7 @@ #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" +#include "NVPTXTargetObjectFile.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -50,6 +51,7 @@ void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +void initializeNVPTXLowerStructArgsPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -64,6 +66,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); + initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); } NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, @@ -72,10 +75,13 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + TLOF(make_unique<NVPTXTargetObjectFile>()), Subtarget(TT, CPU, FS, *this, is64bit) { initAsmInfo(); } +NVPTXTargetMachine::~NVPTXTargetMachine() {} + void NVPTXTargetMachine32::anchor() {} NVPTXTargetMachine32::NVPTXTargetMachine32( @@ -104,8 +110,7 @@ public: void addIRPasses() override; bool addInstSelector() override; - bool addPreRegAlloc() override; - bool addPostRegAlloc() override; + void addPostRegAlloc() override; void addMachineSSAOptimization() override; FunctionPass *createTargetRegisterAllocator(bool) override; @@ -119,6 +124,14 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { return PassConfig; } +void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our NVPTX pass. This + // allows the NVPTX pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(this)); + PM.add(createNVPTXTargetTransformInfoPass(this)); +} + void NVPTXPassConfig::addIRPasses() { // The following passes are known to not play well with virtual regs hanging // around after register allocation (which in our case, is *all* registers). @@ -169,10 +182,8 @@ bool NVPTXPassConfig::addInstSelector() { return false; } -bool NVPTXPassConfig::addPreRegAlloc() { return false; } -bool NVPTXPassConfig::addPostRegAlloc() { - addPass(createNVPTXPrologEpilogPass()); - return false; +void NVPTXPassConfig::addPostRegAlloc() { + addPass(createNVPTXPrologEpilogPass(), false); } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index a7a1c8f4e171..fa97ec8dfe2d 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTX_TARGETMACHINE_H -#define NVPTX_TARGETMACHINE_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H -#include "NVPTXSubtarget.h" #include "ManagedStringPool.h" +#include "NVPTXSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" @@ -25,6 +25,7 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { + std::unique_ptr<TargetLoweringObjectFile> TLOF; NVPTXSubtarget Subtarget; // Hold Strings that can be free'd all together with NVPTXTargetMachine @@ -35,27 +36,9 @@ public: const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); - const TargetFrameLowering *getFrameLowering() const override { - return getSubtargetImpl()->getFrameLowering(); - } - const NVPTXInstrInfo *getInstrInfo() const override { - return getSubtargetImpl()->getInstrInfo(); - } - const DataLayout *getDataLayout() const override { - return getSubtargetImpl()->getDataLayout(); - } - const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } - const NVPTXRegisterInfo *getRegisterInfo() const override { - return getSubtargetImpl()->getRegisterInfo(); - } - - const NVPTXTargetLowering *getTargetLowering() const override { - return getSubtargetImpl()->getTargetLowering(); - } + ~NVPTXTargetMachine() override; - const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { - return getSubtargetImpl()->getSelectionDAGInfo(); - } + const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } ManagedStringPool *getManagedStrPool() const { return const_cast<ManagedStringPool *>(&ManagedStrPool); @@ -63,17 +46,17 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - // Emission of machine code through JITCodeEmitter is not supported. - bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, - bool = true) override { - return true; - } - // Emission of machine code through MCJIT is not supported. bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) override { return true; } + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + /// \brief Register NVPTX analysis passes with a pass manager. + void addAnalysisPasses(PassManagerBase &PM) override; }; // NVPTXTargetMachine. diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index ba8086d78880..00ceca50a9f2 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H -#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H #include "NVPTXSection.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -98,6 +98,9 @@ public: return DataSection; } + const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override; }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp new file mode 100644 index 000000000000..b09d0d424f55 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -0,0 +1,115 @@ +//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements a TargetTransformInfo analysis pass specific to the +// NVPTX target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXTargetMachine.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/CostTable.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +#define DEBUG_TYPE "NVPTXtti" + +// Declare the pass initialization routine locally as target-specific passes +// don't have a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeNVPTXTTIPass(PassRegistry &); +} + +namespace { + +class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo { + const NVPTXTargetLowering *TLI; +public: + NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + NVPTXTTI(const NVPTXTargetMachine *TM) + : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) { + initializeNVPTXTTIPass(*PassRegistry::getPassRegistry()); + } + + void initializePass() override { pushTTIStack(this); } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + void *getAdjustedAnalysisPointer(const void *ID) override { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo *)this; + return this; + } + + bool hasBranchDivergence() const override; + + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue, + OperandValueProperties Opd1PropInfo = OP_None, + OperandValueProperties Opd2PropInfo = OP_None) const override; +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti", + "NVPTX Target Transform Info", true, true, false) +char NVPTXTTI::ID = 0; + +ImmutablePass * +llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) { + return new NVPTXTTI(TM); +} + +bool NVPTXTTI::hasBranchDivergence() const { return true; } + +unsigned NVPTXTTI::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + + switch (ISD) { + default: + return TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + case ISD::ADD: + case ISD::MUL: + case ISD::XOR: + case ISD::OR: + case ISD::AND: + // The machine code (SASS) simulates an i64 with two i32. Therefore, we + // estimate that arithmetic operations on i64 are twice as expensive as + // those on types that can fit into one machine register. + if (LT.second.SimpleTy == MVT::i64) + return 2 * LT.first; + // Delegate other cases to the basic TTI. + return TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + } +} diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index a9fd190b7ff0..cf1feacba3f7 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -15,16 +15,16 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MutexGuard.h" #include <algorithm> #include <cstring> #include <map> #include <string> #include <vector> -#include "llvm/Support/ManagedStatic.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Support/MutexGuard.h" using namespace llvm; @@ -52,7 +52,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(prop && "Annotation property not a string"); // value - ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1)); + ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(md->getOperand(i + 1)); assert(Val && "Value operand not a constant int"); std::string keyname = prop->getString().str(); @@ -75,7 +75,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { const MDNode *elem = NMD->getOperand(i); - Value *entity = elem->getOperand(0); + GlobalValue *entity = + mdconst::dyn_extract_or_null<GlobalValue>(elem->getOperand(0)); // entity may be null due to DCE if (!entity) continue; @@ -90,11 +91,11 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { return; if ((*annotationCache).find(m) != (*annotationCache).end()) - (*annotationCache)[m][gv] = tmp; + (*annotationCache)[m][gv] = std::move(tmp); else { global_val_annot_t tmp1; - tmp1[gv] = tmp; - (*annotationCache)[m] = tmp1; + tmp1[gv] = std::move(tmp); + (*annotationCache)[m] = std::move(tmp1); } } @@ -322,7 +323,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { if (MDNode *alignNode = I.getMetadata("callalign")) { for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) { if (const ConstantInt *CI = - dyn_cast<ConstantInt>(alignNode->getOperand(i))) { + mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(i))) { unsigned v = CI->getZExtValue(); if ((v >> 16) == index) { align = v & 0xFFFF; diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 446bfa1e112c..7e2ce73daaa3 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef NVPTXUTILITIES_H -#define NVPTXUTILITIES_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td index 775df19be162..85aa34e9aea7 100644 --- a/lib/Target/NVPTX/NVPTXVector.td +++ b/lib/Target/NVPTX/NVPTXVector.td @@ -661,7 +661,7 @@ class ShuffleAsmStr4<string type> string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); } -let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in { +let hasSideEffects=0, VecInstType=isVecShuffle.Value in { def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), (ins V4F32Regs:$src1, V4F32Regs:$src2, i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), @@ -847,7 +847,7 @@ class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP> !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), [], sop>; -let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1, +let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1, VecInstType=isVecOther.Value in { def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h index d1d117159486..1915dacf0f20 100644 --- a/lib/Target/NVPTX/NVPTXutil.h +++ b/lib/Target/NVPTX/NVPTXutil.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_NVPTX_UTIL_H -#define LLVM_TARGET_NVPTX_UTIL_H +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" |