summaryrefslogtreecommitdiff
path: root/lib/Target/NVPTX
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-01-18 16:17:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-01-18 16:17:27 +0000
commit67c32a98315f785a9ec9d531c1f571a0196c7463 (patch)
tree4abb9cbeecc7901726dd0b4a37369596c852e9ef /lib/Target/NVPTX
parent9f61947910e6ab40de38e6b4034751ef1513200f (diff)
Diffstat (limited to 'lib/Target/NVPTX')
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt24
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h4
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp5
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h8
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h4
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h4
-rw-r--r--lib/Target/NVPTX/NVPTX.h10
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp229
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h30
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp10
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h4
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp74
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h5
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp105
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h16
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td10
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h4
-rw-r--r--lib/Target/NVPTX/NVPTXLowerStructArgs.cpp134
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h7
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h5
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h4
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h4
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h24
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp23
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h41
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp115
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp19
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h4
-rw-r--r--lib/Target/NVPTX/NVPTXVector.td4
-rw-r--r--lib/Target/NVPTX/NVPTXutil.h4
40 files changed, 507 insertions, 479 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 4e35b1811295..3a4a19dc3991 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -9,26 +9,28 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
+ NVPTXAllocaHoisting.cpp
+ NVPTXAsmPrinter.cpp
+ NVPTXAssignValidGlobalNames.cpp
NVPTXFavorNonGenericAddrSpaces.cpp
NVPTXFrameLowering.cpp
- NVPTXInstrInfo.cpp
+ NVPTXGenericToNVVM.cpp
NVPTXISelDAGToDAG.cpp
NVPTXISelLowering.cpp
+ NVPTXImageOptimizer.cpp
+ NVPTXInstrInfo.cpp
+ NVPTXLowerAggrCopies.cpp
+ NVPTXLowerStructArgs.cpp
+ NVPTXMCExpr.cpp
+ NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp
+ NVPTXReplaceImageHandles.cpp
NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp
- NVPTXLowerAggrCopies.cpp
- NVPTXutil.cpp
- NVPTXAllocaHoisting.cpp
- NVPTXAsmPrinter.cpp
+ NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp
+ NVPTXutil.cpp
NVVMReflect.cpp
- NVPTXGenericToNVVM.cpp
- NVPTXAssignValidGlobalNames.cpp
- NVPTXPrologEpilogPass.cpp
- NVPTXMCExpr.cpp
- NVPTXReplaceImageHandles.cpp
- NVPTXImageOptimizer.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 1fb3c57390c2..04969642fd37 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_INST_PRINTER_H
-#define NVPTX_INST_PRINTER_H
+#ifndef LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 16ec19c25f16..a72ae2ef53a7 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXBASEINFO_H
-#define NVPTXBASEINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H
namespace llvm {
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 366341afe1b8..11d737ec187f 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -25,7 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile",
void NVPTXMCAsmInfo::anchor() {}
-NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
Triple TheTriple(TT);
if (TheTriple.getArch() == Triple::nvptx64) {
PointerSize = CalleeSaveStackSlotSize = 8;
@@ -33,8 +33,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
CommentString = "//";
- HasSetDirective = false;
-
HasSingleParameterDotFile = false;
InlineAsmStart = " inline asm";
@@ -52,5 +50,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
AscizDirective = " .b8";
// @TODO: Can we just disable this?
+ WeakDirective = "\t// .weak\t";
GlobalDirective = "\t// .globl\t";
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 7d1633f60d2c..c3242866b177 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_MCASM_INFO_H
-#define NVPTX_MCASM_INFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCASMINFO_H
#include "llvm/MC/MCAsmInfo.h"
@@ -23,8 +23,8 @@ class StringRef;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
- explicit NVPTXMCAsmInfo(const StringRef &TT);
+ explicit NVPTXMCAsmInfo(StringRef TT);
};
} // namespace llvm
-#endif // NVPTX_MCASM_INFO_H
+#endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index af95c76f92b2..98821d231378 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXMCTARGETDESC_H
-#define NVPTXMCTARGETDESC_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
namespace llvm {
class Target;
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index f9fb05922920..a2d670f8d39d 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_MANAGED_STRING_H
-#define LLVM_SUPPORT_MANAGED_STRING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H
+#define LLVM_LIB_TARGET_NVPTX_MANAGEDSTRINGPOOL_H
#include "llvm/ADT/SmallVector.h"
#include <string>
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index e74c808f8554..a3382eb00003 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_H
-#define LLVM_TARGET_NVPTX_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/ADT/StringMap.h"
@@ -59,8 +59,9 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
llvm_unreachable("Unknown condition code");
}
-FunctionPass *
-createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
+ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM);
+FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
+ llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
@@ -69,6 +70,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
+FunctionPass *createNVPTXLowerStructArgsPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 5b610687e391..69fc86e75414 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_ALLOCA_HOISTING_H_
-#define NVPTX_ALLOCA_HOISTING_H_
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/IR/DataLayout.h"
@@ -47,4 +47,4 @@ extern FunctionPass *createAllocaHoisting();
} // end namespace llvm
-#endif // NVPTX_ALLOCA_HOISTING_H_
+#endif
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 187b88c1d54a..beec9b22921d 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -17,8 +17,8 @@
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
-#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXMCExpr.h"
+#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
@@ -88,12 +88,9 @@ void VisitGlobalVariableForEmission(
return;
// Do we have a circular dependency?
- if (Visiting.count(GV))
+ if (!Visiting.insert(GV).second)
report_fatal_error("Circular dependency found in global variable set");
- // Start visiting this global
- Visiting.insert(GV);
-
// Make sure we visit all dependents first
DenseSet<const GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
@@ -111,159 +108,6 @@ void VisitGlobalVariableForEmission(
}
}
-// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
-// cannot just link to the existing version.
-/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
-///
-using namespace nvptx;
-const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
- MCContext &Ctx = AP.OutContext;
-
- if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::Create(0, Ctx);
-
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
-
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
-
- if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
- return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
-
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (!CE)
- llvm_unreachable("Unknown constant value to lower!");
-
- switch (CE->getOpcode()) {
- default:
- // If the code isn't optimized, there may be outstanding folding
- // opportunities. Attempt to fold the expression using DataLayout as a
- // last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
- if (C != CE)
- return LowerConstant(C, AP);
-
- // Otherwise report the problem to the user.
- {
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
- }
- case Instruction::AddrSpaceCast: {
- // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
- // handled by the generic() logic in the MCExpr printer
- PointerType *DstTy = cast<PointerType>(CE->getType());
- PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType());
- if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
- return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
- }
- std::string S;
- raw_string_ostream OS(S);
- OS << "Unsupported expression in static initializer: ";
- CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
- report_fatal_error(OS.str());
- }
- case Instruction::GetElementPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
- // Generate a symbolic expression for the byte address
- APInt OffsetAI(TD.getPointerSizeInBits(), 0);
- cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
-
- const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
- if (!OffsetAI)
- return Base;
-
- int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
- Ctx);
- }
-
- case Instruction::Trunc:
- // We emit the value and depend on the assembler to truncate the generated
- // expression properly. This is important for differences between
- // blockaddress labels. Since the two labels are in the same function, it
- // is reasonable to treat their delta as a 32-bit value.
- // FALL THROUGH.
- case Instruction::BitCast:
- return LowerConstant(CE->getOperand(0), AP);
-
- case Instruction::IntToPtr: {
- const DataLayout &TD = *AP.TM.getDataLayout();
- // Handle casts to pointers by changing them into casts to the appropriate
- // integer type. This promotes constant folding and simplifies this code.
- Constant *Op = CE->getOperand(0);
- Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
- false /*ZExt*/);
- return LowerConstant(Op, AP);
- }
-
- case Instruction::PtrToInt: {
- const DataLayout &TD = *AP.TM.getDataLayout();
- // Support only foldable casts to/from pointers that can be eliminated by
- // changing the pointer to the appropriately sized integer type.
- Constant *Op = CE->getOperand(0);
- Type *Ty = CE->getType();
-
- const MCExpr *OpExpr = LowerConstant(Op, AP);
-
- // We can emit the pointer value into this slot if the slot is an
- // integer slot equal to the size of the pointer.
- if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
- return OpExpr;
-
- // Otherwise the pointer is smaller than the resultant integer, mask off
- // the high bits so we are sure to get a proper truncation if the input is
- // a constant expr.
- unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr =
- MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
- return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
- }
-
- // The MC library also has a right-shift operator, but it isn't consistently
- // signed or unsigned between different targets.
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::Shl:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
- const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
- switch (CE->getOpcode()) {
- default:
- llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add:
- return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
- case Instruction::Sub:
- return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
- case Instruction::Mul:
- return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
- case Instruction::SDiv:
- return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
- case Instruction::SRem:
- return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
- case Instruction::Shl:
- return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
- case Instruction::And:
- return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
- case Instruction::Or:
- return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
- case Instruction::Xor:
- return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
- }
- }
- }
-}
-
void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
if (!EmitLineNumbers)
return;
@@ -502,8 +346,8 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
}
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
Type *Ty = F->getReturnType();
@@ -530,17 +374,15 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
} else if (isa<PointerType>(Ty)) {
O << ".param .b" << TLI->getPointerTy().getSizeInBits()
<< " func_retval0";
- } else {
- if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
- unsigned totalsz = TD->getTypeAllocSize(Ty);
- unsigned retAlignment = 0;
- if (!llvm::getAlign(*F, 0, retAlignment))
- retAlignment = TD->getABITypeAlignment(Ty);
- O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
- << "]";
- } else
- assert(false && "Unknown return type");
- }
+ } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
+ unsigned totalsz = TD->getTypeAllocSize(Ty);
+ unsigned retAlignment = 0;
+ if (!llvm::getAlign(*F, 0, retAlignment))
+ retAlignment = TD->getABITypeAlignment(Ty);
+ O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+ << "]";
+ } else
+ llvm_unreachable("Unknown return type");
} else {
SmallVector<EVT, 16> vtparts;
ComputeValueVTs(*TLI, Ty, vtparts);
@@ -626,13 +468,14 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
if (TRI->isVirtualRegister(RegNo)) {
OutStreamer.AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
- OutStreamer.AddComment(Twine("implicit-def: ") +
- TM.getRegisterInfo()->getName(RegNo));
+ OutStreamer.AddComment(
+ Twine("implicit-def: ") +
+ TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
}
OutStreamer.AddBlankLine();
}
@@ -794,11 +637,6 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
return false;
}
- if (const MDNode *md = dyn_cast<MDNode>(U))
- if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
- (md->getName().str() == "llvm.dbg.sp")))
- return true;
-
for (const User *UU : U->users())
if (usedInOneFunc(UU, oneFunc) == false)
return false;
@@ -953,7 +791,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(TM.getDataLayout());
+ Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1);
@@ -1154,7 +992,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1288,7 +1126,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isSingleValueType()) {
+ if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
O << " .";
// Special case: ABI requires that we use .u8 for predicates
if (ETy->isIntegerTy(1))
@@ -1457,7 +1295,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1470,7 +1308,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isSingleValueType()) {
+ if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
O << " .";
O << getPTXFundamentalTypeStr(ETy);
O << " ";
@@ -1509,17 +1347,6 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
if (ATy)
return getOpenCLAlignment(TD, ATy->getElementType());
- const VectorType *VTy = dyn_cast<VectorType>(Ty);
- if (VTy) {
- Type *ETy = VTy->getElementType();
- unsigned int numE = VTy->getNumElements();
- unsigned int alignE = TD->getPrefTypeAlignment(ETy);
- if (numE == 3)
- return 4 * alignE;
- else
- return numE * alignE;
- }
-
const StructType *STy = dyn_cast<StructType>(Ty);
if (STy) {
unsigned int alignStruct = 1;
@@ -1578,9 +1405,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
}
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = TM.getTargetLowering();
+ const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
@@ -1771,7 +1598,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
// Map the global virtual register number to a register class specific
// virtual register number starting from 1 with that class.
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
//unsigned numRegClasses = TRI->getNumRegClasses();
// Emit the Fake Stack Object
@@ -1901,7 +1728,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
}
return;
} else {
- O << *LowerConstant(CPV, *this);
+ O << *lowerConstant(CPV);
return;
}
}
@@ -1911,7 +1738,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
int s = TD->getTypeAllocSize(CPV->getType());
@@ -2035,7 +1862,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
int Bytes;
// Old constants
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index a9f9bdd6d3d8..c11b5793b22a 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXASMPRINTER_H
-#define NVPTXASMPRINTER_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
@@ -39,13 +39,6 @@
// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
// (subclass of MCStreamer).
-// This is defined in AsmPrinter.cpp.
-// Used to process the constant expressions in initializers.
-namespace nvptx {
-const llvm::MCExpr *
-LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
-}
-
namespace llvm {
class LineReader {
@@ -86,13 +79,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
// Once we have this AggBuffer setup, we can choose how to print
// it out.
public:
- unsigned size; // size of the buffer in bytes
- unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
- SmallVector<unsigned, 4> symbolPosInBuffer;
- SmallVector<const Value *, 4> Symbols;
private:
+ const unsigned size; // size of the buffer in bytes
+ std::vector<unsigned char> buffer; // the buffer
+ SmallVector<unsigned, 4> symbolPosInBuffer;
+ SmallVector<const Value *, 4> Symbols;
unsigned curpos;
raw_ostream &O;
NVPTXAsmPrinter &AP;
@@ -100,14 +93,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- : O(_O), AP(_AP) {
- buffer = new unsigned char[_size];
- size = _size;
+ : size(_size), buffer(_size), O(_O), AP(_AP) {
curpos = 0;
numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
}
- ~AggBuffer() { delete[] buffer; }
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
assert((curpos + Num) <= size);
assert((curpos + Bytes) <= size);
@@ -170,7 +160,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
O << *Name;
}
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
- O << *nvptx::LowerConstant(Cexpr, AP);
+ O << *AP.lowerConstant(Cexpr);
} else
llvm_unreachable("symbol type unknown");
nSym++;
@@ -179,9 +169,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
else
nextSymbolPos = symbolPosInBuffer[nSym];
} else if (nBytes == 4)
- O << *(unsigned int *)(buffer + pos);
+ O << *(unsigned int *)(&buffer[pos]);
else
- O << *(unsigned long long *)(buffer + pos);
+ O << *(unsigned long long *)(&buffer[pos]);
}
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 8b088412dbba..314df3828b88 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -48,20 +48,20 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
if (is64bit) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
MachineInstr *MI =
- BuildMI(MBB, MBBI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
+ NVPTX::cvta_local_yes_64),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
LocalReg).addImm(MF.getFunctionNumber());
} else {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
MachineInstr *MI =
BuildMI(MBB, MBBI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
NVPTX::VRFrame).addReg(LocalReg);
BuildMI(MBB, MI, dl,
- MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
LocalReg).addImm(MF.getFunctionNumber());
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 56fb673de0eb..0846b78d58e5 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_FRAMELOWERING_H
-#define NVPTX_FRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXFRAMELOWERING_H
#include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index faa9fdb424b6..5f06d9a791a1 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/PassManager.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
@@ -54,8 +55,7 @@ private:
IRBuilder<> &Builder);
Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
IRBuilder<> &Builder);
- void remapNamedMDNode(Module *M, NamedMDNode *N);
- MDNode *remapMDNode(Module *M, MDNode *N);
+ void remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N);
typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
@@ -125,12 +125,17 @@ bool GenericToNVVM::runOnModule(Module &M) {
ConstantToValueMap.clear();
}
+ // Copy GVMap over to a standard value map.
+ ValueToValueMapTy VM;
+ for (auto I = GVMap.begin(), E = GVMap.end(); I != E; ++I)
+ VM[I->first] = I->second;
+
// Walk through the metadata section and update the debug information
// associated with the global variables in the default address space.
for (Module::named_metadata_iterator I = M.named_metadata_begin(),
E = M.named_metadata_end();
I != E; I++) {
- remapNamedMDNode(&M, I);
+ remapNamedMDNode(VM, I);
}
// Walk through the global variable initializers, and replace any use of
@@ -140,20 +145,23 @@ bool GenericToNVVM::runOnModule(Module &M) {
for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
GlobalVariable *GV = I->first;
GlobalVariable *NewGV = I->second;
- ++I;
+
+ // Remove GV from the map so that it can be RAUWed. Note that
+ // DenseMap::erase() won't invalidate any iterators but this one.
+ auto Next = std::next(I);
+ GVMap.erase(I);
+ I = Next;
+
Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType());
// At this point, the remaining uses of GV should be found only in global
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
- for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
- UI != UE;)
- (UI++)->set(BitCastNewGV);
+ GV->replaceAllUsesWith(BitCastNewGV);
std::string Name = GV->getName();
- GV->removeDeadConstantUsers();
GV->eraseFromParent();
NewGV->setName(Name);
}
- GVMap.clear();
+ assert(GVMap.empty() && "Expected it to be empty by now");
return true;
}
@@ -359,7 +367,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
}
}
-void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+void GenericToNVVM::remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N) {
bool OperandChanged = false;
SmallVector<MDNode *, 16> NewOperands;
@@ -369,7 +377,7 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
// converted to another value.
for (unsigned i = 0; i < NumOperands; ++i) {
MDNode *Operand = N->getOperand(i);
- MDNode *NewOperand = remapMDNode(M, Operand);
+ MDNode *NewOperand = MapMetadata(Operand, VM);
OperandChanged |= Operand != NewOperand;
NewOperands.push_back(NewOperand);
}
@@ -387,47 +395,3 @@ void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
N->addOperand(*I);
}
}
-
-MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
-
- bool OperandChanged = false;
- SmallVector<Value *, 8> NewOperands;
- unsigned NumOperands = N->getNumOperands();
-
- // Check if any operand is or contains a global variable in GVMap, and thus
- // converted to another value.
- for (unsigned i = 0; i < NumOperands; ++i) {
- Value *Operand = N->getOperand(i);
- Value *NewOperand = Operand;
- if (Operand) {
- if (isa<GlobalVariable>(Operand)) {
- GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
- if (I != GVMap.end()) {
- NewOperand = I->second;
- if (++i < NumOperands) {
- NewOperands.push_back(NewOperand);
- // Address space of the global variable follows the global variable
- // in the global variable debug info (see createGlobalVariable in
- // lib/Analysis/DIBuilder.cpp).
- NewOperand =
- ConstantInt::get(Type::getInt32Ty(M->getContext()),
- I->second->getType()->getAddressSpace());
- }
- }
- } else if (isa<MDNode>(Operand)) {
- NewOperand = remapMDNode(M, cast<MDNode>(Operand));
- }
- }
- OperandChanged |= Operand != NewOperand;
- NewOperands.push_back(NewOperand);
- }
-
- // If none of the operands has been modified, return N as it is.
- if (!OperandChanged) {
- return N;
- }
-
- // If any of the operands has been modified, create a new MDNode with the new
- // operands.
- return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
-}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 05205fba1aff..cd0422d78a8c 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
static cl::opt<int> UsePrecDivF32(
"nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
- " IEEE Compliant F32 div.rnd if avaiable."),
+ " IEEE Compliant F32 div.rnd if available."),
cl::init(2));
static cl::opt<bool>
@@ -5041,17 +5041,10 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
unsigned int spN) const {
const Value *Src = nullptr;
- // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
- // the classof() for MemSDNode does not include MemIntrinsicSDNode
- // (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
if (spN == 0 && mN->getMemOperand()->getPseudoValue())
return true;
Src = mN->getMemOperand()->getValue();
- } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
- if (spN == 0 && mN->getMemOperand()->getPseudoValue())
- return true;
- Src = mN->getMemOperand()->getValue();
}
if (!Src)
return false;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index c62fc253c33d..69afcd7320d3 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXISELDAGTODAG_H
+
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
@@ -92,3 +95,5 @@ private:
};
}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d76b20a29eb7..093ba1a2b824 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -106,8 +106,8 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
}
// NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
- : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
+ : TargetLowering(TM), nvTM(&TM),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
// always lower memset, memcpy, and memmove intrinsics to load/store
@@ -203,8 +203,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
// Turn FP extload into load/fextend
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -214,12 +215,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setTruncStoreAction(MVT::i64, MVT::i1, Expand);
- setTruncStoreAction(MVT::i32, MVT::i1, Expand);
- setTruncStoreAction(MVT::i16, MVT::i1, Expand);
- setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
// This is legal in NVPTX
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
@@ -232,9 +232,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::ADDE, MVT::i64, Expand);
// Register custom handling for vector loads/stores
- for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
- ++i) {
- MVT VT = (MVT::SimpleValueType) i;
+ for (MVT VT : MVT::vector_valuetypes()) {
if (IsPTXVectorType(VT)) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
@@ -905,16 +903,14 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ } else if ((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << getDataLayout()->getTypeAllocSize(retTy) << "]";
} else {
- if((retTy->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(retTy)) {
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << getDataLayout()->getTypeAllocSize(retTy) << "]";
- } else {
- assert(false && "Unknown return type");
- }
+ llvm_unreachable("Unknown return type");
}
O << ") ";
}
@@ -1355,7 +1351,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
- if (retTy->isSingleValueType()) {
+ // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
+ // these three types to match the logic in
+ // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
+ // Plus, this behavior is consistent with nvcc's.
+ if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
+ retTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
@@ -1451,8 +1452,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
- assert(nvTM->getTargetLowering()->getNumRegisters(F->getContext(),
- ObjectVT) == NumElts &&
+ assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
+ F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
bool needTruncate = sz < 8 ? true : false;
@@ -2028,7 +2029,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = DAG.getTarget().getTargetLowering();
+ const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
@@ -2142,7 +2143,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
MachinePointerInfo(srcValue), partVT, false,
- false, partAlign);
+ false, false, partAlign);
} else {
p = DAG.getLoad(partVT, dl, Root, srcAddr,
MachinePointerInfo(srcValue), false, false, false,
@@ -2163,7 +2164,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
unsigned NumElts = ObjectVT.getVectorNumElements();
assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
- unsigned Ofst = 0;
EVT EltVT = ObjectVT.getVectorElementType();
// V1 load
@@ -2172,10 +2172,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// We only have one element, so just directly load it
Value *SrcValue = Constant::getNullValue(PointerType::get(
EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
if (P.getNode())
@@ -2184,7 +2182,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
InVals.push_back(P);
- Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext()));
++InsIdx;
} else if (NumElts == 2) {
// V2 load
@@ -2192,10 +2189,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
Value *SrcValue = Constant::getNullValue(PointerType::get(
VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
SDValue P = DAG.getLoad(
- VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
+ VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
false, true,
TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
@@ -2213,7 +2208,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
InVals.push_back(Elt0);
InVals.push_back(Elt1);
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
InsIdx += 2;
} else {
// V4 loads
@@ -2231,6 +2225,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
VecSize = 2;
}
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
+ unsigned Ofst = 0;
for (unsigned i = 0; i < NumElts; i += VecSize) {
Value *SrcValue = Constant::getNullValue(
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
@@ -2275,6 +2270,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
ISD::SEXTLOAD : ISD::ZEXTLOAD;
p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
MachinePointerInfo(srcValue), ObjectVT, false, false,
+ false,
TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
} else {
p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
@@ -3269,16 +3265,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
-
- // alignment is available as metadata.
- // Grab it and set the alignment.
- assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
- MDNode *AlignMD = I.getMetadata("align");
- assert(AlignMD && "Must have a non-null MDNode");
- assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
- Value *Align = AlignMD->getOperand(0);
- int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
- Info.align = Alignment;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
return true;
}
@@ -3298,16 +3285,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
-
- // alignment is available as metadata.
- // Grab it and set the alignment.
- assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
- MDNode *AlignMD = I.getMetadata("align");
- assert(AlignMD && "Must have a non-null MDNode");
- assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
- Value *Align = AlignMD->getOperand(0);
- int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
- Info.align = Alignment;
+ Info.align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
return true;
}
@@ -3866,8 +3844,8 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
}
else if (N0.getOpcode() == ISD::FMUL) {
if (VT == MVT::f32 || VT == MVT::f64) {
- NVPTXTargetLowering *TLI =
- (NVPTXTargetLowering *)&DAG.getTargetLoweringInfo();
+ const auto *TLI = static_cast<const NVPTXTargetLowering *>(
+ &DAG.getTargetLoweringInfo());
if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel))
return SDValue();
@@ -4053,13 +4031,13 @@ static bool IsMulWideOperandDemotable(SDValue Op,
if (Op.getOpcode() == ISD::SIGN_EXTEND ||
Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Signed;
return true;
}
} else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
EVT OrigVT = Op.getOperand(0).getValueType();
- if (OrigVT.getSizeInBits() == OptSize) {
+ if (OrigVT.getSizeInBits() <= OptSize) {
S = Unsigned;
return true;
}
@@ -4514,3 +4492,10 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete DwarfRangesSection;
delete DwarfMacroInfoSection;
}
+
+const MCSection *
+NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ return getDataSection();
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index bef6ed9faad6..b3fea3f4a36a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXISELLOWERING_H
-#define NVPTXISELLOWERING_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
#include "NVPTX.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -436,7 +436,7 @@ class NVPTXSubtarget;
//===--------------------------------------------------------------------===//
class NVPTXTargetLowering : public TargetLowering {
public:
- explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
+ explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -495,7 +495,7 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- NVPTXTargetMachine *nvTM;
+ const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
@@ -505,9 +505,9 @@ public:
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
- virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
- return true;
- }
+ bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
+
+ bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
@@ -538,4 +538,4 @@ private:
};
} // namespace llvm
-#endif // NVPTXISELLOWERING_H
+#endif
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index a98fb37f6e25..aa36b6be7250 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -16,11 +16,11 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index b5b4fbed0799..740ca0328efe 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -14,11 +14,11 @@
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
-#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 2ac29748676a..6de75364a823 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXINSTRUCTIONINFO_H
-#define NVPTXINSTRUCTIONINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXINSTRINFO_H
#include "NVPTX.h"
#include "NVPTXRegisterInfo.h"
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 9900b8c8433f..2c571c4878a8 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -296,7 +296,7 @@ multiclass F2<string OpcStr, SDNode OpNode> {
// General Type Conversion
//-----------------------------------
-let neverHasSideEffects = 1 in {
+let hasSideEffects = 0 in {
// Generate a cvt to the given type from all possible types.
// Each instance takes a CvtMode immediate that defines the conversion mode to
// use. It can be CvtNONE to omit a conversion mode.
@@ -2094,7 +2094,7 @@ multiclass LD<NVPTXRegClass regclass> {
"$fromWidth \t$dst, [$addr+$offset];"), []>;
}
-let mayLoad=1, neverHasSideEffects=1 in {
+let mayLoad=1, hasSideEffects=0 in {
defm LD_i8 : LD<Int16Regs>;
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
@@ -2136,7 +2136,7 @@ multiclass ST<NVPTXRegClass regclass> {
" \t[$addr+$offset], $src;"), []>;
}
-let mayStore=1, neverHasSideEffects=1 in {
+let mayStore=1, hasSideEffects=0 in {
defm ST_i8 : ST<Int16Regs>;
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
@@ -2220,7 +2220,7 @@ multiclass LD_VEC<NVPTXRegClass regclass> {
"$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
[]>;
}
-let mayLoad=1, neverHasSideEffects=1 in {
+let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
@@ -2303,7 +2303,7 @@ multiclass ST_VEC<NVPTXRegClass regclass> {
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
[]>;
}
-let mayStore=1, neverHasSideEffects=1 in {
+let mayStore=1, hasSideEffects=0 in {
defm STV_i8 : ST_VEC<Int16Regs>;
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index 5ec1fc969687..8759406a6803 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_LOWER_AGGR_COPIES_H
-#define NVPTX_LOWER_AGGR_COPIES_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/IR/DataLayout.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
new file mode 100644
index 000000000000..3149399afb30
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -0,0 +1,134 @@
+//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Copy struct args to local memory. This is needed for kernel functions only.
+// This is a preparation for handling cases like
+//
+// kernel void foo(struct A arg, ...)
+// {
+// struct A *p = &arg;
+// ...
+// ... = p->filed1 ... (this is no generic address for .param)
+// p->filed2 = ... (this is no write access to .param)
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
+}
+
+class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+ void handleStructPtrArgs(Function &);
+ void handleParam(Argument *);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ NVPTXLowerStructArgs() : FunctionPass(ID) {}
+ const char *getPassName() const override {
+ return "Copy structure (byval *) arguments to stack";
+ }
+};
+
+char NVPTXLowerStructArgs::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
+ "Lower structure arguments (NVPTX)", false, false)
+
+void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
+ Function *Func = Arg->getParent();
+ Instruction *FirstInst = &(Func->getEntryBlock().front());
+ PointerType *PType = dyn_cast<PointerType>(Arg->getType());
+
+ assert(PType && "Expecting pointer type in handleParam");
+
+ Type *StructType = PType->getElementType();
+ AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
+
+ /* Set the alignment to alignment of the byval parameter. This is because,
+ * later load/stores assume that alignment, and we are going to replace
+ * the use of the byval parameter with this alloca instruction.
+ */
+ AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
+
+ Arg->replaceAllUsesWith(AllocA);
+
+ // Get the cvt.gen.to.param intrinsic
+ Type *CvtTypes[] = {
+ Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),
+ Type::getInt8PtrTy(Func->getParent()->getContext(),
+ ADDRESS_SPACE_GENERIC)};
+ Function *CvtFunc = Intrinsic::getDeclaration(
+ Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes);
+
+ Value *BitcastArgs[] = {
+ new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),
+ ADDRESS_SPACE_GENERIC),
+ Arg->getName(), FirstInst)};
+ CallInst *CallCVT =
+ CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst);
+
+ BitCastInst *BitCast = new BitCastInst(
+ CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM),
+ Arg->getName(), FirstInst);
+ LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
+ new StoreInst(LI, AllocA, FirstInst);
+}
+
+// =============================================================================
+// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then
+// add the following instructions to the first basic block :
+//
+// %temp = alloca %struct.x, align 8
+// %tt1 = bitcast %struct.x * %d to i8 *
+// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
+// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
+// %tv = load %struct.x addrspace(101) * %tempd
+// store %struct.x %tv, %struct.x * %temp, align 8
+//
+// The above code allocates some space in the stack and copies the incoming
+// struct from param space to local space.
+// Then replace all occurences of %d by %temp.
+// =============================================================================
+void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
+ for (Argument &Arg : F.args()) {
+ if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
+ handleParam(&Arg);
+ }
+ }
+}
+
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
+ // Skip non-kernels. See the comments at the top of this file.
+ if (!isKernelFunction(F))
+ return false;
+
+ handleStructPtrArgs(F);
+ return true;
+}
+
+FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
+ return new NVPTXLowerStructArgs();
+}
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 554764930a9e..d39a394fe750 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -9,8 +9,8 @@
// Modeled after ARMMCExpr
-#ifndef NVPTXMCEXPR_H
-#define NVPTXMCEXPR_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXMCEXPR_H
#include "llvm/ADT/APFloat.h"
#include "llvm/MC/MCExpr.h"
@@ -63,7 +63,8 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const override {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override {
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 67fb39050797..10f1135ad841 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -12,6 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXMACHINEFUNCTIONINFO_H
+
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -44,3 +47,5 @@ public:
}
};
}
+
+#endif
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 348ab0c4bf14..a1e1b9e74480 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -48,8 +49,8 @@ char NVPTXPrologEpilogPass::ID = 0;
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering &TFI = *TM.getFrameLowering();
- const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+ const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
+ const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
bool Modified = false;
calculateFrameObjectOffsets(MF);
@@ -108,8 +109,8 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
void
NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
bool StackGrowsDown =
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index a7594be121a0..d2e67331f788 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXREGISTERINFO_H
-#define NVPTXREGISTERINFO_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXREGISTERINFO_H
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 20d4e272341e..b7f53c7929d1 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -16,11 +16,11 @@
#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXSubtarget.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
using namespace llvm;
@@ -33,9 +33,9 @@ private:
public:
NVPTXReplaceImageHandles();
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "NVPTX Replace Image Handles";
}
private:
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index aa0436bf0da7..f1d3cb4da51b 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_NVPTXSECTION_H
-#define LLVM_NVPTXSECTION_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXSECTION_H
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index d5cded218362..3d52532310ff 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -59,7 +59,8 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
SmVersion(20), DL(computeDataLayout(is64Bit)),
InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) {
+ TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL),
+ FrameLowering(*this) {
Triple T(TT);
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 4c41e4e470dd..fb2d4047631a 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXSUBTARGET_H
-#define NVPTXSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXSUBTARGET_H
#include "NVPTX.h"
#include "NVPTXFrameLowering.h"
@@ -57,14 +57,20 @@ public:
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM, bool is64Bit);
- const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
- const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- const DataLayout *getDataLayout() const { return &DL; }
- const NVPTXRegisterInfo *getRegisterInfo() const {
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const NVPTXRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; }
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const NVPTXTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
bool hasBrkPt() const { return SmVersion >= 11; }
bool hasAtomRedG32() const { return SmVersion >= 11; }
@@ -113,4 +119,4 @@ public:
} // End llvm namespace
-#endif // NVPTXSUBTARGET_H
+#endif
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 069a1b9966f0..c7f95071b9fc 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
+#include "NVPTXTargetObjectFile.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -50,6 +51,7 @@ void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerStructArgsPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -64,6 +66,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
+ initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
}
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
@@ -72,10 +75,13 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<NVPTXTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
+NVPTXTargetMachine::~NVPTXTargetMachine() {}
+
void NVPTXTargetMachine32::anchor() {}
NVPTXTargetMachine32::NVPTXTargetMachine32(
@@ -104,8 +110,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
- bool addPreRegAlloc() override;
- bool addPostRegAlloc() override;
+ void addPostRegAlloc() override;
void addMachineSSAOptimization() override;
FunctionPass *createTargetRegisterAllocator(bool) override;
@@ -119,6 +124,14 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
+void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our NVPTX pass. This
+ // allows the NVPTX pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createNVPTXTargetTransformInfoPass(this));
+}
+
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
@@ -169,10 +182,8 @@ bool NVPTXPassConfig::addInstSelector() {
return false;
}
-bool NVPTXPassConfig::addPreRegAlloc() { return false; }
-bool NVPTXPassConfig::addPostRegAlloc() {
- addPass(createNVPTXPrologEpilogPass());
- return false;
+void NVPTXPassConfig::addPostRegAlloc() {
+ addPass(createNVPTXPrologEpilogPass(), false);
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index a7a1c8f4e171..fa97ec8dfe2d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTX_TARGETMACHINE_H
-#define NVPTX_TARGETMACHINE_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETMACHINE_H
-#include "NVPTXSubtarget.h"
#include "ManagedStringPool.h"
+#include "NVPTXSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -25,6 +25,7 @@ namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
NVPTXSubtarget Subtarget;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
@@ -35,27 +36,9 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
- const TargetFrameLowering *getFrameLowering() const override {
- return getSubtargetImpl()->getFrameLowering();
- }
- const NVPTXInstrInfo *getInstrInfo() const override {
- return getSubtargetImpl()->getInstrInfo();
- }
- const DataLayout *getDataLayout() const override {
- return getSubtargetImpl()->getDataLayout();
- }
- const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- const NVPTXRegisterInfo *getRegisterInfo() const override {
- return getSubtargetImpl()->getRegisterInfo();
- }
-
- const NVPTXTargetLowering *getTargetLowering() const override {
- return getSubtargetImpl()->getTargetLowering();
- }
+ ~NVPTXTargetMachine() override;
- const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return getSubtargetImpl()->getSelectionDAGInfo();
- }
+ const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
@@ -63,17 +46,17 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- // Emission of machine code through JITCodeEmitter is not supported.
- bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
- bool = true) override {
- return true;
- }
-
// Emission of machine code through MCJIT is not supported.
bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
bool = true) override {
return true;
}
+ TargetLoweringObjectFile *getObjFileLowering() const override {
+ return TLOF.get();
+ }
+
+ /// \brief Register NVPTX analysis passes with a pass manager.
+ void addAnalysisPasses(PassManagerBase &PM) override;
}; // NVPTXTargetMachine.
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index ba8086d78880..00ceca50a9f2 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
-#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETOBJECTFILE_H
#include "NVPTXSection.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -98,6 +98,9 @@ public:
return DataSection;
}
+ const MCSection *
+ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const override;
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
new file mode 100644
index 000000000000..b09d0d424f55
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -0,0 +1,115 @@
+//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// NVPTX target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "NVPTXtti"
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't have a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeNVPTXTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo {
+ const NVPTXTargetLowering *TLI;
+public:
+ NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ NVPTXTTI(const NVPTXTargetMachine *TM)
+ : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) {
+ initializeNVPTXTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void initializePass() override { pushTTIStack(this); }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ void *getAdjustedAnalysisPointer(const void *ID) override {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo *)this;
+ return this;
+ }
+
+ bool hasBranchDivergence() const override;
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+ OperandValueKind Opd2Info = OK_AnyValue,
+ OperandValueProperties Opd1PropInfo = OP_None,
+ OperandValueProperties Opd2PropInfo = OP_None) const override;
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti",
+ "NVPTX Target Transform Info", true, true, false)
+char NVPTXTTI::ID = 0;
+
+ImmutablePass *
+llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) {
+ return new NVPTXTTI(TM);
+}
+
+bool NVPTXTTI::hasBranchDivergence() const { return true; }
+
+unsigned NVPTXTTI::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+ switch (ISD) {
+ default:
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::XOR:
+ case ISD::OR:
+ case ISD::AND:
+ // The machine code (SASS) simulates an i64 with two i32. Therefore, we
+ // estimate that arithmetic operations on i64 are twice as expensive as
+ // those on types that can fit into one machine register.
+ if (LT.second.SimpleTy == MVT::i64)
+ return 2 * LT.first;
+ // Delegate other cases to the basic TTI.
+ return TargetTransformInfo::getArithmeticInstrCost(
+ Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ }
+}
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index a9fd190b7ff0..cf1feacba3f7 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -15,16 +15,16 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <string>
#include <vector>
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Support/MutexGuard.h"
using namespace llvm;
@@ -52,7 +52,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
assert(prop && "Annotation property not a string");
// value
- ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
+ ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(md->getOperand(i + 1));
assert(Val && "Value operand not a constant int");
std::string keyname = prop->getString().str();
@@ -75,7 +75,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *elem = NMD->getOperand(i);
- Value *entity = elem->getOperand(0);
+ GlobalValue *entity =
+ mdconst::dyn_extract_or_null<GlobalValue>(elem->getOperand(0));
// entity may be null due to DCE
if (!entity)
continue;
@@ -90,11 +91,11 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
return;
if ((*annotationCache).find(m) != (*annotationCache).end())
- (*annotationCache)[m][gv] = tmp;
+ (*annotationCache)[m][gv] = std::move(tmp);
else {
global_val_annot_t tmp1;
- tmp1[gv] = tmp;
- (*annotationCache)[m] = tmp1;
+ tmp1[gv] = std::move(tmp);
+ (*annotationCache)[m] = std::move(tmp1);
}
}
@@ -322,7 +323,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
if (MDNode *alignNode = I.getMetadata("callalign")) {
for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+ mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(i))) {
unsigned v = CI->getZExtValue();
if ((v >> 16) == index) {
align = v & 0xFFFF;
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 446bfa1e112c..7e2ce73daaa3 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef NVPTXUTILITIES_H
-#define NVPTXUTILITIES_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXUTILITIES_H
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td
index 775df19be162..85aa34e9aea7 100644
--- a/lib/Target/NVPTX/NVPTXVector.td
+++ b/lib/Target/NVPTX/NVPTXVector.td
@@ -661,7 +661,7 @@ class ShuffleAsmStr4<string type>
string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s);
}
-let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in {
+let hasSideEffects=0, VecInstType=isVecShuffle.Value in {
def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst),
(ins V4F32Regs:$src1, V4F32Regs:$src2,
i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3),
@@ -847,7 +847,7 @@ class Vec_Move<string asmstr, NVPTXRegClass vclass, NVPTXInst sop=NOP>
!strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"),
[], sop>;
-let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1,
+let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1,
VecInstType=isVecOther.Value in {
def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>;
def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>;
diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h
index d1d117159486..1915dacf0f20 100644
--- a/lib/Target/NVPTX/NVPTXutil.h
+++ b/lib/Target/NVPTX/NVPTXutil.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_NVPTX_UTIL_H
-#define LLVM_TARGET_NVPTX_UTIL_H
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"