diff options
Diffstat (limited to 'lib/Target/NVPTX')
43 files changed, 3575 insertions, 3750 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 7cb16b4dd810..7da2fed4cd57 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,7 +22,7 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXUtilities.cpp - VectorElementize.cpp + NVVMReflect.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index 454583850b71..b3e8b5d2622d 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -52,25 +52,24 @@ enum PropertyAnnotation { }; const unsigned AnnotationNameLen = 8; // length of each annotation name -const char -PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { - "maxntidx", // PROPERTY_MAXNTID_X - "maxntidy", // PROPERTY_MAXNTID_Y - "maxntidz", // PROPERTY_MAXNTID_Z - "reqntidx", // PROPERTY_REQNTID_X - "reqntidy", // PROPERTY_REQNTID_Y - "reqntidz", // PROPERTY_REQNTID_Z - "minctasm", // PROPERTY_MINNCTAPERSM - "texture", // PROPERTY_ISTEXTURE - "surface", // PROPERTY_ISSURFACE - "sampler", // PROPERTY_ISSAMPLER - "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM - "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM - "kernel", // PROPERTY_ISKERNEL_FUNCTION - "align", // PROPERTY_ALIGN +const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { + "maxntidx", // PROPERTY_MAXNTID_X + "maxntidy", // PROPERTY_MAXNTID_Y + "maxntidz", // PROPERTY_MAXNTID_Z + "reqntidx", // PROPERTY_REQNTID_X + "reqntidy", // PROPERTY_REQNTID_Y + "reqntidz", // PROPERTY_REQNTID_Z + "minctasm", // PROPERTY_MINNCTAPERSM + "texture", // PROPERTY_ISTEXTURE + "surface", // PROPERTY_ISSURFACE + "sampler", // PROPERTY_ISSAMPLER + "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM + "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM + "kernel", // PROPERTY_ISKERNEL_FUNCTION + "align", // PROPERTY_ALIGN - // last property - "proplast", // PROPERTY_LAST + // last property + "proplast", // PROPERTY_LAST }; // name of named metadata used for global annotations @@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { // compiling those .cpp files, hence __attribute__((unused)). __attribute__((unused)) #endif -static const char* NamedMDForAnnotations = "nvvm.annotations"; + static const char *NamedMDForAnnotations = "nvvm.annotations"; } - #endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 1d4166575da5..459cd96cb0cd 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -23,15 +23,15 @@ bool CompileForDebugging; // compile for debugging static cl::opt<bool, true> Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden, - cl::location(CompileForDebugging), - cl::init(false)); + cl::location(CompileForDebugging), cl::init(false)); -void NVPTXMCAsmInfo::anchor() { } +void NVPTXMCAsmInfo::anchor() {} NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::nvptx64) - PointerSize = 8; + if (TheTriple.getArch() == Triple::nvptx64) { + PointerSize = CalleeSaveStackSlotSize = 8; + } CommentString = "//"; @@ -54,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Data32bitsDirective = " .b32 "; Data64bitsDirective = " .b64 "; PrivateGlobalPrefix = ""; - ZeroDirective = " .b8"; + ZeroDirective = " .b8"; AsciiDirective = " .b8"; AscizDirective = " .b8"; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 44aa01ca6e30..ccd29705df72 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -28,7 +28,6 @@ #define GET_REGINFO_MC_DESC #include "NVPTXGenRegisterInfo.inc" - using namespace llvm; static MCInstrInfo *createNVPTXMCInstrInfo() { @@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { return X; } -static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { +static MCSubtargetInfo * +createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); return X; } -static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { +static MCCodeGenInfo *createNVPTXMCCodeGenInfo( + StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); X->InitMCCodeGenInfo(RM, CM, OL); return X; } - // Force static initialization. extern "C" void LLVMInitializeNVPTXTargetMC() { // Register the MC asm info. diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index b5684883fc95..d6c79b5110cc 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_SUPPORT_MANAGED_STRING_H #define LLVM_SUPPORT_MANAGED_STRING_H diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index a8d082a4d8b0..6a53a443bfb6 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -15,11 +15,11 @@ #ifndef LLVM_TARGET_NVPTX_H #define LLVM_TARGET_NVPTX_H -#include "llvm/Value.h" -#include "llvm/Module.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" -#include "MCTargetDesc/NVPTXBaseInfo.h" #include <cassert> #include <iosfwd> @@ -41,19 +41,24 @@ enum CondCodes { inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { switch (CC) { - case NVPTXCC::NE: return "ne"; - case NVPTXCC::EQ: return "eq"; - case NVPTXCC::LT: return "lt"; - case NVPTXCC::LE: return "le"; - case NVPTXCC::GT: return "gt"; - case NVPTXCC::GE: return "ge"; + case NVPTXCC::NE: + return "ne"; + case NVPTXCC::EQ: + return "eq"; + case NVPTXCC::LT: + return "lt"; + case NVPTXCC::LE: + return "le"; + case NVPTXCC::GT: + return "gt"; + case NVPTXCC::GE: + return "ge"; } llvm_unreachable("Unknown condition code"); } -FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, - llvm::CodeGenOpt::Level OptLevel); -FunctionPass *createVectorElementizePass(NVPTXTargetMachine &); +FunctionPass * +createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); @@ -63,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *); extern Target TheNVPTXTarget32; extern Target TheNVPTXTarget64; -namespace NVPTX -{ +namespace NVPTX { enum DrvInterface { NVCL, CUDA, @@ -103,7 +107,7 @@ enum LoadStore { }; namespace PTXLdStInstCode { -enum AddressSpace{ +enum AddressSpace { GENERIC = 0, GLOBAL = 1, CONSTANT = 2, diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index 7aee3595c625..d78b4e81a3e5 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td" //===----------------------------------------------------------------------===// // SM Versions -def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", - "Target SM 1.0">; -def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", - "Target SM 1.1">; -def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", - "Target SM 1.2">; -def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", - "Target SM 1.3">; def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", "Target SM 2.0">; def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", @@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [SM10]>; -def : Proc<"sm_11", [SM11]>; -def : Proc<"sm_12", [SM12]>; -def : Proc<"sm_13", [SM13]>; def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 668c39308f71..0f792ec6826e 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" #include "NVPTXAllocaHoisting.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" namespace llvm { bool NVPTXAllocaHoisting::runOnFunction(Function &function) { - bool functionModified = false; - Function::iterator I = function.begin(); - TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); + bool functionModified = false; + Function::iterator I = function.begin(); + TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); for (Function::iterator E = function.end(); I != E; ++I) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { @@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { } char NVPTXAllocaHoisting::ID = 1; -RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting", - "Hoisting alloca instructions in non-entry " - "blocks to the entry block"); +RegisterPass<NVPTXAllocaHoisting> +X("alloca-hoisting", "Hoisting alloca instructions in non-entry " + "blocks to the entry block"); -FunctionPass *createAllocaHoisting() { - return new NVPTXAllocaHoisting(); -} +FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); } } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index c7cabf695311..19d73c5783cb 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -15,8 +15,8 @@ #define NVPTX_ALLOCA_HOISTING_H_ #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" namespace llvm { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0a885ce1c4a6..ce5d78afa332 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -13,40 +13,40 @@ //===----------------------------------------------------------------------===// #include "NVPTXAsmPrinter.h" +#include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" -#include "NVPTXTargetMachine.h" +#include "NVPTXNumRegisters.h" #include "NVPTXRegisterInfo.h" +#include "NVPTXTargetMachine.h" #include "NVPTXUtilities.h" -#include "MCTargetDesc/NVPTXMCAsmInfo.h" -#include "NVPTXNumRegisters.h" +#include "cl_common_defines.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Module.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/TimeValue.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Support/Path.h" -#include "llvm/Assembly/Writer.h" -#include "cl_common_defines.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TimeValue.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include <sstream> using namespace llvm; - #include "NVPTXGenAsmWriter.inc" bool RegAllocNilUsed = true; @@ -58,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers", cl::desc("NVPTX Specific: Emit Line numbers even without -G"), cl::init(true)); -namespace llvm { -bool InterleaveSrcInPtx = false; -} - -static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Emit source line in ptx file"), - cl::location(llvm::InterleaveSrcInPtx)); +namespace llvm { bool InterleaveSrcInPtx = false; } +static cl::opt<bool, true> +InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Emit source line in ptx file"), + cl::location(llvm::InterleaveSrcInPtx)); namespace { /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V /// depends. -void DiscoverDependentGlobals(Value *V, - DenseSet<GlobalVariable*> &Globals) { +void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) Globals.insert(GV); else { @@ -87,12 +83,12 @@ void DiscoverDependentGlobals(Value *V, /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable /// instances to be emitted, but only after any dependents have been added /// first. -void VisitGlobalVariableForEmission(GlobalVariable *GV, - SmallVectorImpl<GlobalVariable*> &Order, - DenseSet<GlobalVariable*> &Visited, - DenseSet<GlobalVariable*> &Visiting) { +void VisitGlobalVariableForEmission( + GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order, + DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) { // Have we already visited this one? - if (Visited.count(GV)) return; + if (Visited.count(GV)) + return; // Do we have a circular dependency? if (Visiting.count(GV)) @@ -102,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV, Visiting.insert(GV); // Make sure we visit all dependents first - DenseSet<GlobalVariable*> Others; + DenseSet<GlobalVariable *> Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) DiscoverDependentGlobals(GV->getOperand(i), Others); - - for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), - E = Others.end(); I != E; ++I) + + for (DenseSet<GlobalVariable *>::iterator I = Others.begin(), + E = Others.end(); + I != E; ++I) VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); // Now we can visit ourself @@ -141,43 +138,35 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { if (CE == 0) llvm_unreachable("Unknown constant value to lower!"); - switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) return LowerConstant(C, AP); // Otherwise report the problem to the user. { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - WriteAsOperand(OS, CE, /*PrintType=*/false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/ false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address - const Constant *PtrVal = CE->getOperand(0); - SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); - int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); + APInt OffsetAI(TD.getPointerSizeInBits(), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); - if (Offset == 0) + if (!OffsetAI) return Base; - // Truncate/sext the offset to the pointer size. - if (TD.getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } - + int64_t Offset = OffsetAI.getSExtValue(); return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); } @@ -187,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // expression properly. This is important for differences between // blockaddress labels. Since the two labels are in the same function, it // is reasonable to treat their delta as a 32-bit value. - // FALL THROUGH. + // FALL THROUGH. case Instruction::BitCast: return LowerConstant(CE->getOperand(0), AP); @@ -197,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), - false/*ZExt*/); + false /*ZExt*/); return LowerConstant(Op, AP); } @@ -219,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + const MCExpr *MaskExpr = + MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } - // The MC library also has a right-shift operator, but it isn't consistently + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: case Instruction::Sub: @@ -237,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); switch (CE->getOpcode()) { - default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); - case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); - case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); - case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); - case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); - case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); - case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); - case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); - case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + default: + llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: + return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: + return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: + return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: + return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: + return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: + return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: + return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: + return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); + case Instruction::Xor: + return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); } } } } - -void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) -{ +void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { if (!EmitLineNumbers) return; if (ignoreLoc(MI)) @@ -273,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (curLoc.isUnknown()) return; - const MachineFunction *MF = MI.getParent()->getParent(); //const TargetMachine &TM = MF->getTarget(); @@ -294,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (filenameMap.find(fileName.str()) == filenameMap.end()) return; - // Emit the line from the source file. if (llvm::InterleaveSrcInPtx) this->emitSrcInText(fileName.str(), curLoc.getLine()); std::stringstream temp; - temp << "\t.loc " << filenameMap[fileName.str()] - << " " << curLoc.getLine() << " " << curLoc.getCol(); + temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() + << " " << curLoc.getCol(); OutStreamer.EmitRawText(Twine(temp.str().c_str())); } @@ -314,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(OS.str()); } -void NVPTXAsmPrinter::printReturnValStr(const Function *F, - raw_ostream &O) -{ +void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const TargetLowering *TLI = TM.getTargetLowering(); @@ -334,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; + if (size < 32) + size = 32; } else { - assert(Ty->isFloatingPointTy() && - "Floating point type expected here"); + assert(Ty->isFloatingPointTy() && "Floating point type expected here"); size = Ty->getPrimitiveSizeInBits(); } O << ".param .b" << size << " func_retval0"; - } - else if (isa<PointerType>(Ty)) { + } else if (isa<PointerType>(Ty)) { O << ".param .b" << TLI->getPointerTy().getSizeInBits() - << " func_retval0"; + << " func_retval0"; } else { - if ((Ty->getTypeID() == Type::StructTyID) || - isa<VectorType>(Ty)) { + if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } unsigned retAlignment = 0; if (!llvm::getAlign(*F, 0, retAlignment)) retAlignment = TD->getABITypeAlignment(Ty); - O << ".param .align " - << retAlignment - << " .b8 func_retval0[" - << totalsz << "]"; + O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz + << "]"; } else - assert(false && - "Unknown return type"); + assert(false && "Unknown return type"); } } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -388,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " func_retval" << idx; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -416,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { // Set up MRI = &MF->getRegInfo(); F = MF->getFunction(); - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else { @@ -439,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { void NVPTXAsmPrinter::EmitFunctionBodyStart() { const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); unsigned numRegClasses = TRI.getNumRegClasses(); - VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1]; + VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1]; OutStreamer.EmitRawText(StringRef("{\n")); setAndEmitFunctionVirtualRegisters(*MF); @@ -451,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() { void NVPTXAsmPrinter::EmitFunctionBodyEnd() { OutStreamer.EmitRawText(StringRef("}\n")); - delete []VRidGlobal2LocalMap; + delete[] VRidGlobal2LocalMap; } - -void -NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const { +void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, + raw_ostream &O) const { // If the NVVM IR has some of reqntid* specified, then output // the reqntid directive, and set the unspecified ones to 1. // If none of reqntid* is specified, don't output reqntid directive. unsigned reqntidx, reqntidy, reqntidz; bool specified = false; - if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; - else specified = true; - if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; - else specified = true; - if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; - else specified = true; + if (llvm::getReqNTIDx(F, reqntidx) == false) + reqntidx = 1; + else + specified = true; + if (llvm::getReqNTIDy(F, reqntidy) == false) + reqntidy = 1; + else + specified = true; + if (llvm::getReqNTIDz(F, reqntidz) == false) + reqntidz = 1; + else + specified = true; if (specified) - O << ".reqntid " << reqntidx << ", " - << reqntidy << ", " << reqntidz << "\n"; + O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz + << "\n"; // If the NVVM IR has some of maxntid* specified, then output // the maxntid directive, and set the unspecified ones to 1. // If none of maxntid* is specified, don't output maxntid directive. unsigned maxntidx, maxntidy, maxntidz; specified = false; - if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; - else specified = true; - if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; - else specified = true; - if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; - else specified = true; + if (llvm::getMaxNTIDx(F, maxntidx) == false) + maxntidx = 1; + else + specified = true; + if (llvm::getMaxNTIDy(F, maxntidy) == false) + maxntidy = 1; + else + specified = true; + if (llvm::getMaxNTIDz(F, maxntidz) == false) + maxntidz = 1; + else + specified = true; if (specified) - O << ".maxntid " << maxntidx << ", " - << maxntidy << ", " << maxntidz << "\n"; + O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz + << "\n"; unsigned mincta; if (llvm::getMinCTASm(F, mincta)) O << ".minnctapersm " << mincta << "\n"; } -void -NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, - raw_ostream &O) { - const TargetRegisterClass * RC = MRI->getRegClass(vr); +void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, + raw_ostream &O) { + const TargetRegisterClass *RC = MRI->getRegClass(vr); unsigned id = RC->getID(); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; @@ -508,61 +509,41 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, O << getNVPTXRegClassStr(RC) << mapped_vr; return; } - // Vector virtual register - if (getNVPTXVectorSize(RC) == 4) - O << "{" - << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_1, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_2, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_3" - << "}"; - else if (getNVPTXVectorSize(RC) == 2) - O << "{" - << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " - << getNVPTXRegClassStr(RC) << mapped_vr << "_1" - << "}"; - else - llvm_unreachable("Unsupported vector size"); + report_fatal_error("Bad register!"); } -void -NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, - raw_ostream &O) { +void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, + raw_ostream &O) { getVirtualRegisterName(vr, isVec, O); } -void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, - raw_ostream &O) { - static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; - int Imm = (int)MO.getImm(); - if(0 == strcmp(Modifier, "vecelem")) +void NVPTXAsmPrinter::printVecModifiedImmediate( + const MachineOperand &MO, const char *Modifier, raw_ostream &O) { + static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; + int Imm = (int) MO.getImm(); + if (0 == strcmp(Modifier, "vecelem")) O << "_" << vecelem[Imm]; - else if(0 == strcmp(Modifier, "vecv4comm1")) { - if((Imm < 0) || (Imm > 3)) + else if (0 == strcmp(Modifier, "vecv4comm1")) { + if ((Imm < 0) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4comm2")) { - if((Imm < 4) || (Imm > 7)) + } else if (0 == strcmp(Modifier, "vecv4comm2")) { + if ((Imm < 4) || (Imm > 7)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%4]; - } - else if(0 == strcmp(Modifier, "vecv2comm1")) { - if((Imm < 0) || (Imm > 1)) + } else if (0 == strcmp(Modifier, "vecv4pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 4]; + } else if (0 == strcmp(Modifier, "vecv2comm1")) { + if ((Imm < 0) || (Imm > 1)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2comm2")) { - if((Imm < 2) || (Imm > 3)) + } else if (0 == strcmp(Modifier, "vecv2comm2")) { + if ((Imm < 2) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%2]; - } - else + } else if (0 == strcmp(Modifier, "vecv2pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 2]; + } else llvm_unreachable("Unknown Modifier on immediate operand"); } @@ -584,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, emitVirtualRegister(MO.getReg(), true, O); else llvm_unreachable( - "Don't know how to handle the modifier on virtual register."); + "Don't know how to handle the modifier on virtual register."); } } return; @@ -595,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, else if (strstr(Modifier, "vec") == Modifier) printVecModifiedImmediate(MO, Modifier, O); else - llvm_unreachable("Don't know how to handle modifier on immediate operand"); + llvm_unreachable( + "Don't know how to handle modifier on immediate operand"); return; case MachineOperand::MO_FPImmediate: @@ -607,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_ExternalSymbol: { - const char * symbname = MO.getSymbolName(); + const char *symbname = MO.getSymbolName(); if (strstr(symbname, ".PARAM") == symbname) { unsigned index; - sscanf(symbname+6, "%u[];", &index); + sscanf(symbname + 6, "%u[];", &index); printParamName(index, O); - } - else if (strstr(symbname, ".HLPPARAM") == symbname) { + } else if (strstr(symbname, ".HLPPARAM") == symbname) { unsigned index; - sscanf(symbname+9, "%u[];", &index); + sscanf(symbname + 9, "%u[];", &index); O << *CurrentFnSym << "_param_" << index << "_offset"; - } - else + } else O << symbname; break; } @@ -632,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, } } -void NVPTXAsmPrinter:: -printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { +void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI, + raw_ostream &O) const { #ifndef __OPTIMIZE__ O << "\t// Implicit def :"; //printOperand(MI, 0); @@ -647,64 +627,69 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, if (Modifier && !strcmp(Modifier, "add")) { O << ", "; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } else { - if (MI->getOperand(opNum+1).isImm() && - MI->getOperand(opNum+1).getImm() == 0) + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) return; // don't print ',0' or '+0' O << "+"; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } } void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, - raw_ostream &O, const char *Modifier) -{ + raw_ostream &O, const char *Modifier) { if (Modifier) { const MachineOperand &MO = MI->getOperand(opNum); - int Imm = (int)MO.getImm(); + int Imm = (int) MO.getImm(); if (!strcmp(Modifier, "volatile")) { if (Imm) O << ".volatile"; } else if (!strcmp(Modifier, "addsp")) { switch (Imm) { - case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; - case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; - case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; - case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; - case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; + case NVPTX::PTXLdStInstCode::GLOBAL: + O << ".global"; + break; + case NVPTX::PTXLdStInstCode::SHARED: + O << ".shared"; + break; + case NVPTX::PTXLdStInstCode::LOCAL: + O << ".local"; + break; + case NVPTX::PTXLdStInstCode::PARAM: + O << ".param"; + break; + case NVPTX::PTXLdStInstCode::CONSTANT: + O << ".const"; + break; case NVPTX::PTXLdStInstCode::GENERIC: if (!nvptxSubtarget.hasGenericLdSt()) O << ".global"; break; default: - assert("wrong value"); + llvm_unreachable("Wrong Address Space"); } - } - else if (!strcmp(Modifier, "sign")) { - if (Imm==NVPTX::PTXLdStInstCode::Signed) + } else if (!strcmp(Modifier, "sign")) { + if (Imm == NVPTX::PTXLdStInstCode::Signed) O << "s"; - else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) + else if (Imm == NVPTX::PTXLdStInstCode::Unsigned) O << "u"; else O << "f"; - } - else if (!strcmp(Modifier, "vec")) { - if (Imm==NVPTX::PTXLdStInstCode::V2) + } else if (!strcmp(Modifier, "vec")) { + if (Imm == NVPTX::PTXLdStInstCode::V2) O << ".v2"; - else if (Imm==NVPTX::PTXLdStInstCode::V4) + else if (Imm == NVPTX::PTXLdStInstCode::V4) O << ".v4"; - } - else - assert("unknown modifier"); - } - else - assert("unknown modifier"); + } else + llvm_unreachable("Unknown Modifier"); + } else + llvm_unreachable("Empty Modifier"); } -void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else @@ -715,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { O << ";\n"; } -static bool usedInGlobalVarDef(const Constant *C) -{ +static bool usedInGlobalVarDef(const Constant *C) { if (!C) return false; @@ -726,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C) return true; } - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { const Constant *C = dyn_cast<Constant>(*ui); if (usedInGlobalVarDef(C)) return true; @@ -735,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C) return false; } -static bool usedInOneFunc(const User *U, Function const *&oneFunc) -{ +static bool usedInOneFunc(const User *U, Function const *&oneFunc) { if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { if (othergv->getName().str() == "llvm.used") return true; @@ -749,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) return false; oneFunc = curFunc; return true; - } - else + } else return false; } if (const MDNode *md = dyn_cast<MDNode>(U)) if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || - (md->getName().str() == "llvm.dbg.sp"))) + (md->getName().str() == "llvm.dbg.sp"))) return true; - - for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); - ui!=ue; ++ui) { + for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); + ui != ue; ++ui) { if (usedInOneFunc(*ui, oneFunc) == false) return false; } @@ -795,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { static bool useFuncSeen(const Constant *C, llvm::DenseMap<const Function *, bool> &seenMap) { - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { if (const Constant *cu = dyn_cast<Constant>(*ui)) { if (useFuncSeen(cu, seenMap)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { const BasicBlock *bb = I->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; if (seenMap.find(caller) != seenMap.end()) return true; } @@ -812,10 +795,9 @@ static bool useFuncSeen(const Constant *C, return false; } -void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { llvm::DenseMap<const Function *, bool> seenMap; - for (Module::const_iterator FI=M.begin(), FE=M.end(); - FI!=FE; ++FI) { + for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { const Function *F = FI; if (F->isDeclaration()) { @@ -827,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { emitDeclaration(F, O); continue; } - for (Value::const_use_iterator iter=F->use_begin(), - iterEnd=F->use_end(); iter!=iterEnd; ++iter) { + for (Value::const_use_iterator iter = F->use_begin(), + iterEnd = F->use_end(); + iter != iterEnd; ++iter) { if (const Constant *C = dyn_cast<Constant>(*iter)) { if (usedInGlobalVarDef(C)) { // The use is in the initialization of a global variable @@ -847,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { } } - if (!isa<Instruction>(*iter)) continue; + if (!isa<Instruction>(*iter)) + continue; const Instruction *instr = cast<Instruction>(*iter); const BasicBlock *bb = instr->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; // If a caller has already been seen, then the caller is // appearing in the module before the callee. so print out @@ -871,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { DebugInfoFinder DbgFinder; DbgFinder.processModule(M); - unsigned i=1; + unsigned i = 1; for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { + E = DbgFinder.compile_unit_end(); + I != E; ++I) { DICompileUnit DIUnit(*I); StringRef Filename(DIUnit.getFilename()); StringRef Dirname(DIUnit.getDirectory()); @@ -890,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) { + E = DbgFinder.subprogram_end(); + I != E; ++I) { DISubprogram SP(*I); StringRef Filename(SP.getFilename()); StringRef Dirname(SP.getDirectory()); @@ -906,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } } -bool NVPTXAsmPrinter::doInitialization (Module &M) { +bool NVPTXAsmPrinter::doInitialization(Module &M) { SmallString<128> Str1; raw_svector_ostream OS1(Str1); @@ -918,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { //bool Result = AsmPrinter::doInitialization(M); // Initialize TargetLoweringObjectFile. - const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) - .Initialize(OutContext, TM); + const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) + .Initialize(OutContext, TM); Mang = new Mangler(OutContext, *TM.getDataLayout()); @@ -927,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitHeader(M, OS1); OutStreamer.EmitRawText(OS1.str()); - // Already commented out //bool Result = AsmPrinter::doInitialization(M); - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) recordAndEmitFilenames(M); @@ -945,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { // global variable in order, and ensure that we emit it *after* its dependent // globals. We use a little extra memory maintaining both a set and a list to // have fast searches while maintaining a strict ordering. - SmallVector<GlobalVariable*,8> Globals; - DenseSet<GlobalVariable*> GVVisited; - DenseSet<GlobalVariable*> GVVisiting; + SmallVector<GlobalVariable *, 8> Globals; + DenseSet<GlobalVariable *> GVVisited; + DenseSet<GlobalVariable *> GVVisiting; // Visit each global variable, in order - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); - assert(GVVisited.size() == M.getGlobalList().size() && + assert(GVVisited.size() == M.getGlobalList().size() && "Missed a global variable"); assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); @@ -965,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { OS2 << '\n'; OutStreamer.EmitRawText(OS2.str()); - return false; // success + return false; // success } -void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { O << "//\n"; O << "// Generated by LLVM NVPTX Back-End\n"; O << "//\n"; @@ -1008,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { Module::GlobalListType &global_list = M.getGlobalList(); int i, n = global_list.size(); - GlobalVariable **gv_array = new GlobalVariable* [n]; + GlobalVariable **gv_array = new GlobalVariable *[n]; // first, back-up GlobalVariable in gv_array i = 0; for (Module::global_iterator I = global_list.begin(), E = global_list.end(); - I != E; ++I) + I != E; ++I) gv_array[i++] = &*I; // second, empty global_list @@ -1024,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { bool ret = AsmPrinter::doFinalization(M); // now we restore global variables - for (i = 0; i < n; i ++) + for (i = 0; i < n; i++) global_list.insert(global_list.end(), gv_array[i]); delete[] gv_array; return ret; - //bool Result = AsmPrinter::doFinalization(M); // Instead of calling the parents doFinalization, we may // clone parents doFinalization and customize here. @@ -1050,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { // external without init -> .extern // appending -> not allowed, assert. -void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) -{ +void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, + raw_ostream &O) { if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { if (V->hasExternalLinkage()) { if (isa<GlobalVariable>(V)) { @@ -1078,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) } } - -void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, +void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool processDemoted) { // Skip meta data @@ -1130,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, if (Initializer) CI = dyn_cast<ConstantInt>(Initializer); if (CI) { - unsigned sample=CI->getZExtValue(); + unsigned sample = CI->getZExtValue(); O << " = { "; - for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> - __CLK_ADDRESS_BASE) ; i < 3 ; i++) { + for (int i = 0, + addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); + i < 3; i++) { O << "addr_mode_" << i << " = "; switch (addr) { - case 0: O << "wrap"; break; - case 1: O << "clamp_to_border"; break; - case 2: O << "clamp_to_edge"; break; - case 3: O << "wrap"; break; - case 4: O << "mirror"; break; + case 0: + O << "wrap"; + break; + case 1: + O << "clamp_to_border"; + break; + case 2: + O << "clamp_to_edge"; + break; + case 3: + O << "wrap"; + break; + case 4: + O << "mirror"; + break; } - O <<", "; + O << ", "; } O << "filter_mode = "; - switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { - case 0: O << "nearest"; break; - case 1: O << "linear"; break; - case 2: assert ( 0 && "Anisotropic filtering is not supported"); - default: O << "nearest"; break; + switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { + case 0: + O << "nearest"; + break; + case 1: + O << "linear"; + break; + case 2: + assert(0 && "Anisotropic filtering is not supported"); + default: + O << "nearest"; + break; } - if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { + if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { O << ", force_unnormalized_coords = 1"; } O << " }"; @@ -1195,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, else O << " .align " << GVar->getAlignment(); - if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { O << " ."; O << getPTXFundamentalTypeStr(ETy, false); @@ -1205,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and global state // spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); if (!Initializer->isNullValue()) { - O << " = " ; + O << " = "; printScalarConstant(Initializer, O); } } } else { - unsigned int ElementSize =0; + unsigned int ElementSize = 0; // Although PTX has direct support for struct type and array type and // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for @@ -1229,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and // global state spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); - if (!isa<UndefValue>(Initializer) && - !Initializer->isNullValue()) { + if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { if (nvptxSubtarget.is64Bit()) { - O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/8; - } - else { - O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/4; + O << " .u64 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 8; + } else { + O << " .u32 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 4; } O << "]"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar) << "["; O << ElementSize; O << "]"; } - O << " = {" ; + O << " = {"; aggBuffer.print(); O << "}"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } - } - else { + } else { O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } } @@ -1289,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { std::vector<GlobalVariable *> &gvars = localDecls[f]; - for (unsigned i=0, e=gvars.size(); i!=e; ++i) { + for (unsigned i = 0, e = gvars.size(); i != e; ++i) { O << "\t// demoted variable\n\t"; printModuleLevelGV(gvars[i], O, true); } @@ -1299,32 +1295,33 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const { switch (AddressSpace) { case llvm::ADDRESS_SPACE_LOCAL: - O << "local" ; + O << "local"; break; case llvm::ADDRESS_SPACE_GLOBAL: - O << "global" ; + O << "global"; break; case llvm::ADDRESS_SPACE_CONST: // This logic should be consistent with that in // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) if (nvptxSubtarget.hasGenericLdSt()) - O << "global" ; + O << "global"; else - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_SHARED: - O << "shared" ; + O << "shared"; break; default: - llvm_unreachable("unexpected address space"); + report_fatal_error("Bad address space found while emitting PTX"); + break; } } -std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, - bool useB4PTR) const { +std::string +NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { switch (Ty->getTypeID()) { default: llvm_unreachable("unexpected type"); @@ -1348,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, return "f64"; case Type::PointerTyID: if (nvptxSubtarget.is64Bit()) - if (useB4PTR) return "b64"; - else return "u64"; + if (useB4PTR) + return "b64"; + else + return "u64"; + else if (useB4PTR) + return "b32"; else - if (useB4PTR) return "b32"; - else return "u32"; + return "u32"; } llvm_unreachable("unexpected type"); return NULL; } -void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, +void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); @@ -1382,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, return; } - int64_t ElementSize =0; + int64_t ElementSize = 0; // Although PTX has direct support for struct type and array type and LLVM IR // is very similar to PTX, the LLVM CodeGen does not support for targets that @@ -1393,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, case Type::ArrayTyID: case Type::VectorTyID: ElementSize = TD->getTypeStoreSize(ETy); - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + O << " .b8 " << *Mang->getSymbol(GVar) << "["; if (ElementSize) { - O << itostr(ElementSize) ; + O << itostr(ElementSize); } O << "]"; break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } - return ; + return; } - -static unsigned int -getOpenCLAlignment(const DataLayout *TD, - Type *Ty) { +static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) return TD->getPrefTypeAlignment(Ty); @@ -1422,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int numE = VTy->getNumElements(); unsigned int alignE = TD->getPrefTypeAlignment(ETy); if (numE == 3) - return 4*alignE; + return 4 * alignE; else - return numE*alignE; + return numE * alignE; } const StructType *STy = dyn_cast<StructType>(Ty); @@ -1432,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int alignStruct = 1; // Go through each element of the struct and find the // largest alignment. - for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { Type *ETy = STy->getElementType(i); unsigned int align = getOpenCLAlignment(TD, ETy); if (align > alignStruct) @@ -1476,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { } for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { - if (i==paramIndex) { + if (i == paramIndex) { printParamName(I, paramIndex, O); return; } @@ -1484,10 +1481,9 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { llvm_unreachable("paramIndex out of bound"); } -void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, - raw_ostream &O) { +void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); - const AttrListPtr &PAL = F->getAttributes(); + const AttributeSet &PAL = F->getAttributes(); const TargetLowering *TLI = TM.getTargetLowering(); Function::const_arg_iterator I, E; unsigned paramIndex = 0; @@ -1499,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "(\n"; for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!first) O << ",\n"; @@ -1514,15 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; else // Default image is read_only O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; - } - else // Should be llvm::isSampler(*I) + } else // Should be llvm::isSampler(*I) O << "\t.param .samplerref " << *CurrentFnSym << "_param_" - << paramIndex; + << paramIndex; continue; } - if (PAL.getParamAttributes(paramIndex+1). - hasAttribute(Attributes::ByVal) == false) { + if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { + if (Ty->isVectorTy()) { + // Just print .param .b8 .align <a> .param[size]; + // <a> = PAL.getparamalignment + // size = typeallocsize of element type + unsigned align = PAL.getParamAlignment(paramIndex + 1); + if (align == 0) + align = TD->getABITypeAlignment(Ty); + + unsigned sz = TD->getTypeAllocSize(Ty); + O << "\t.param .align " << align << " .b8 "; + printParamName(I, paramIndex, O); + O << "[" << sz << "]"; + + continue; + } // Just a scalar const PointerType *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { @@ -1533,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { Type *ETy = PTy->getElementType(); int addrSpace = PTy->getAddressSpace(); - switch(addrSpace) { + switch (addrSpace) { default: O << ".ptr "; break; @@ -1548,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << ".ptr .global "; break; } - O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; + O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; } printParamName(I, paramIndex, O); continue; } // non-pointer scalar to kernel func - O << "\t.param ." - << getPTXFundamentalTypeStr(Ty) << " "; + O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " "; printParamName(I, paramIndex, O); continue; } @@ -1565,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -1581,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // param has byVal attribute. So should be a pointer const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI || isKernelFunc) { // Just print .param .b8 .align <a> .param[size]; // <a> = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex+1); + unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) align = TD->getABITypeAlignment(ETy); unsigned sz = TD->getTypeAllocSize(ETy); - O << "\t.param .align " << align - << " .b8 "; + O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; continue; @@ -1606,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // each vector element. SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -1614,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << "\t.reg .b" << sz << " "; printParamName(I, paramIndex, O); - if (j<je-1) O << ",\n"; + if (j < je - 1) + O << ",\n"; ++paramIndex; } - if (i<e-1) + if (i < e - 1) O << ",\n"; } --paramIndex; @@ -1639,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, emitFunctionParamList(F, O); } - -void NVPTXAsmPrinter:: -setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { +void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( + const MachineFunction &MF) { SmallString<128> Str; raw_svector_ostream O(Str); @@ -1654,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); int NumBytes = (int) MFI->getStackSize(); if (NumBytes) { - O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" - << DEPOTNAME - << getFunctionNumber() << "[" << NumBytes << "];\n"; + O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME + << getFunctionNumber() << "[" << NumBytes << "];\n"; if (nvptxSubtarget.is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; - } - else { + } else { O << "\t.reg .b32 \t%SP;\n"; O << "\t.reg .b32 \t%SPL;\n"; } @@ -1672,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { // register number and the per class virtual register number. // We use the per class virtual register number in the ptx output. unsigned int numVRs = MRI->getNumVirtRegs(); - for (unsigned i=0; i< numVRs; i++) { + for (unsigned i = 0; i < numVRs; i++) { unsigned int vr = TRI->index2VirtReg(i); const TargetRegisterClass *RC = MRI->getRegClass(vr); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; int n = regmap.size(); - regmap.insert(std::make_pair(vr, n+1)); + regmap.insert(std::make_pair(vr, n + 1)); } // Emit register declarations @@ -1721,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { OutStreamer.EmitRawText(O.str()); } - void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { - APFloat APF = APFloat(Fp->getValueAPF()); // make a copy + APFloat APF = APFloat(Fp->getValueAPF()); // make a copy bool ignored; unsigned int numHex; const char *lead; - if (Fp->getType()->getTypeID()==Type::FloatTyID) { + if (Fp->getType()->getTypeID() == Type::FloatTyID) { numHex = 8; lead = "0f"; - APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { numHex = 16; lead = "0d"; - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); } else llvm_unreachable("unsupported fp type"); @@ -1779,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { llvm_unreachable("Not scalar type found in printScalarConstant()"); } - void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) { @@ -1787,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); - if (s<Bytes) + if (s < Bytes) s = Bytes; aggBuffer->addZeros(s); return; @@ -1798,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::IntegerTyID: { const Type *ETy = CPV->getType(); - if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ + if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); ptr = &c; aggBuffer->addBytes(ptr, 1, Bytes); - } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { - short int16 = - (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); - ptr = (unsigned char*)&int16; + } else if (ETy == Type::getInt16Ty(CPV->getContext())) { + short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); - } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt32Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (ConstantInt *constInt = - dyn_cast<ConstantInt>(ConstantFoldConstantExpression( - Cexpr, TD))) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + if (ConstantInt *constInt = dyn_cast<ConstantInt>( + ConstantFoldConstantExpression(Cexpr, TD))) { + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } @@ -1831,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } } llvm_unreachable("unsupported integer const type"); - } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt64Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + ConstantFoldConstantExpression(Cexpr, TD))) { + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } @@ -1860,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::FloatTyID: case Type::DoubleTyID: { ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); - const Type* Ty = CFP->getType(); + const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { - float float32 = (float)CFP->getValueAPF().convertToFloat(); - ptr = (unsigned char*)&float32; + float float32 = (float) CFP->getValueAPF().convertToFloat(); + ptr = (unsigned char *)&float32; aggBuffer->addBytes(ptr, 4, Bytes); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { double float64 = CFP->getValueAPF().convertToDouble(); - ptr = (unsigned char*)&float64; + ptr = (unsigned char *)&float64; aggBuffer->addBytes(ptr, 8, Bytes); - } - else { + } else { llvm_unreachable("unsupported fp const type"); } break; @@ -1878,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::PointerTyID: { if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { aggBuffer->addSymbol(GVar); - } - else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v); } @@ -1895,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, isa<ConstantStruct>(CPV)) { int ElementSize = TD->getTypeAllocSize(CPV->getType()); bufferAggregateConstant(CPV, aggBuffer); - if ( Bytes > ElementSize ) - aggBuffer->addZeros(Bytes-ElementSize); - } - else if (isa<ConstantAggregateZero>(CPV)) + if (Bytes > ElementSize) + aggBuffer->addZeros(Bytes - ElementSize); + } else if (isa<ConstantAggregateZero>(CPV)) aggBuffer->addZeros(Bytes); else llvm_unreachable("Unexpected Constant type"); @@ -1924,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, } if (const ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { + dyn_cast<ConstantDataSequential>(CPV)) { if (CDS->getNumElements()) for (unsigned i = 0; i < CDS->getNumElements(); ++i) bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, @@ -1932,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, return; } - if (isa<ConstantStruct>(CPV)) { if (CPV->getNumOperands()) { StructType *ST = cast<StructType>(CPV->getType()); for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { - if ( i == (e - 1)) + if (i == (e - 1)) Bytes = TD->getStructLayout(ST)->getElementOffset(0) + - TD->getTypeAllocSize(ST) - - TD->getStructLayout(ST)->getElementOffset(i); + TD->getTypeAllocSize(ST) - + TD->getStructLayout(ST)->getElementOffset(i); else - Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - - TD->getStructLayout(ST)->getElementOffset(i); - bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, - aggBuffer); + Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - + TD->getStructLayout(ST)->getElementOffset(i); + bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); } } return; @@ -1956,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, // buildTypeNameMap - Run through symbol table looking for type names. // - bool NVPTXAsmPrinter::isImageType(const Type *Ty) { std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); - if (PI != TypeNameMap.end() && - (!PI->second.compare("struct._image1d_t") || - !PI->second.compare("struct._image2d_t") || - !PI->second.compare("struct._image3d_t"))) + if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || + !PI->second.compare("struct._image2d_t") || + !PI->second.compare("struct._image3d_t"))) return true; return false; @@ -1974,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) { /// bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. + if (ExtraCode[1] != 0) + return true; // Unknown modifier. switch (ExtraCode[0]) { default: @@ -1993,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } -bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { +bool NVPTXAsmPrinter::PrintAsmMemoryOperand( + const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier + return true; // Unknown modifier O << '['; printMemOperand(MI, OpNo, O); @@ -2008,71 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) -{ - switch(MI.getOpcode()) { +bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; - case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: - case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: - case NVPTX::CallArgF64: case NVPTX::CallArgI16: - case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: - case NVPTX::CallArgI64: case NVPTX::CallArgI8: - case NVPTX::CallArgParam: case NVPTX::CallVoidInst: - case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: + case NVPTX::CallArgBeginInst: + case NVPTX::CallArgEndInst0: + case NVPTX::CallArgEndInst1: + case NVPTX::CallArgF32: + case NVPTX::CallArgF64: + case NVPTX::CallArgI16: + case NVPTX::CallArgI32: + case NVPTX::CallArgI32imm: + case NVPTX::CallArgI64: + case NVPTX::CallArgI8: + case NVPTX::CallArgParam: + case NVPTX::CallVoidInst: + case NVPTX::CallVoidInstReg: + case NVPTX::Callseq_End: case NVPTX::CallVoidInstReg64: - case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: - case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: - case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: - case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: - case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: - case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: - case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: - case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: - case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64: - case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32: - case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8: - case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16: - case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8: - case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64: - case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32: - case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8: - case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16: - case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8: - case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: - case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: - case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: - case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64: - case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32: - case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8: - case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16: - case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8: - case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64: - case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32: - case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8: - case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16: - case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8: - case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: - case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: - case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: - case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: - case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: - case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: - case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: - case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: - case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: - case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: - case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64: - case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32: - case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8: - case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16: - case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8: - case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64: - case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32: - case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8: - case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16: - case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8: - case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: + case NVPTX::DeclareParamInst: + case NVPTX::DeclareRetMemInst: + case NVPTX::DeclareRetRegInst: + case NVPTX::DeclareRetScalarInst: + case NVPTX::DeclareScalarParamInst: + case NVPTX::DeclareScalarRegInst: + case NVPTX::StoreParamF32: + case NVPTX::StoreParamF64: + case NVPTX::StoreParamI16: + case NVPTX::StoreParamI32: + case NVPTX::StoreParamI64: + case NVPTX::StoreParamI8: + case NVPTX::StoreParamS32I8: + case NVPTX::StoreParamU32I8: + case NVPTX::StoreParamS32I16: + case NVPTX::StoreParamU32I16: + case NVPTX::StoreRetvalF32: + case NVPTX::StoreRetvalF64: + case NVPTX::StoreRetvalI16: + case NVPTX::StoreRetvalI32: + case NVPTX::StoreRetvalI64: + case NVPTX::StoreRetvalI8: + case NVPTX::LastCallArgF32: + case NVPTX::LastCallArgF64: + case NVPTX::LastCallArgI16: + case NVPTX::LastCallArgI32: + case NVPTX::LastCallArgI32imm: + case NVPTX::LastCallArgI64: + case NVPTX::LastCallArgI8: + case NVPTX::LastCallArgParam: + case NVPTX::LoadParamMemF32: + case NVPTX::LoadParamMemF64: + case NVPTX::LoadParamMemI16: + case NVPTX::LoadParamMemI32: + case NVPTX::LoadParamMemI64: + case NVPTX::LoadParamMemI8: + case NVPTX::LoadParamRegF32: + case NVPTX::LoadParamRegF64: + case NVPTX::LoadParamRegI16: + case NVPTX::LoadParamRegI32: + case NVPTX::LoadParamRegI64: + case NVPTX::LoadParamRegI8: + case NVPTX::PrototypeInst: + case NVPTX::DBG_VALUE: return true; } return false; @@ -2084,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); } - void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; - LineReader * reader = this->getReader(filename.str()); + LineReader *reader = this->getReader(filename.str()); temp << "\n//"; temp << filename.str(); temp << ":"; @@ -2098,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { this->OutStreamer.EmitRawText(Twine(temp.str())); } - LineReader *NVPTXAsmPrinter::getReader(std::string filename) { - if (reader == NULL) { - reader = new LineReader(filename); + if (reader == NULL) { + reader = new LineReader(filename); } if (reader->fileName() != filename) { delete reader; - reader = new LineReader(filename); + reader = new LineReader(filename); } return reader; } - -std::string -LineReader::readLine(unsigned lineNum) { +std::string LineReader::readLine(unsigned lineNum) { if (lineNum < theCurLine) { theCurLine = 0; - fstr.seekg(0,std::ios::beg); + fstr.seekg(0, std::ios::beg); } while (theCurLine < lineNum) { - fstr.getline(buff,500); + fstr.getline(buff, 500); theCurLine++; } return buff; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 6488b1442580..6dc9fc0ffeff 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -16,19 +16,19 @@ #define NVPTXASMPRINTER_H #include "NVPTX.h" -#include "NVPTXTargetMachine.h" #include "NVPTXSubtarget.h" -#include "llvm/Function.h" +#include "NVPTXTargetMachine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Target/TargetMachine.h" #include <fstream> // The ptx syntax and format is very different from that usually seem in a .s @@ -43,15 +43,15 @@ // This is defined in AsmPrinter.cpp. // Used to process the constant expressions in initializers. namespace nvptx { -const llvm::MCExpr *LowerConstant(const llvm::Constant *CV, - llvm::AsmPrinter &AP) ; +const llvm::MCExpr * +LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP); } namespace llvm { class LineReader { private: - unsigned theCurLine ; + unsigned theCurLine; std::ifstream fstr; char buff[512]; std::string theFileName; @@ -63,17 +63,12 @@ public: theFileName = filename; } std::string fileName() { return theFileName; } - ~LineReader() { - fstr.close(); - } + ~LineReader() { fstr.close(); } std::string readLine(unsigned line); }; - - class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { - class AggBuffer { // Used to buffer the emitted string for initializing global // aggregates. @@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // Once we have this AggBuffer setup, we can choose how to print // it out. public: - unsigned size; // size of the buffer in bytes + unsigned size; // size of the buffer in bytes unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses SmallVector<unsigned, 4> symbolPosInBuffer; @@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { public: AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - :O(_O),AP(_AP) { + : O(_O), AP(_AP) { buffer = new unsigned char[_size]; size = _size; curpos = 0; numSymbols = 0; } - ~AggBuffer() { - delete [] buffer; - } + ~AggBuffer() { delete[] buffer; } unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { - assert((curpos+Num) <= size); - assert((curpos+Bytes) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + assert((curpos + Bytes) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = Ptr[i]; - curpos ++; + curpos++; } - for ( int i=Num; i < Bytes ; ++i) { + for (int i = Num; i < Bytes; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } unsigned addZeros(int Num) { - assert((curpos+Num) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } @@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { void print() { if (numSymbols == 0) { // print out in bytes - for (unsigned i=0; i<size; i++) { + for (unsigned i = 0; i < size; i++) { if (i) O << ", "; - O << (unsigned int)buffer[i]; + O << (unsigned int) buffer[i]; } } else { // print out in 4-bytes or 8-bytes @@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned int nBytes = 4; if (AP.nvptxSubtarget.is64Bit()) nBytes = 8; - for (pos=0; pos<size; pos+=nBytes) { + for (pos = 0; pos < size; pos += nBytes) { if (pos) O << ", "; if (pos == nextSymbolPos) { @@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { MCSymbol *Name = AP.Mang->getSymbol(GVar); O << *Name; - } - else if (ConstantExpr *Cexpr = - dyn_cast<ConstantExpr>(v)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { O << *nvptx::LowerConstant(Cexpr, AP); } else llvm_unreachable("symbol type unknown"); nSym++; if (nSym >= numSymbols) - nextSymbolPos = size+1; + nextSymbolPos = size + 1; else nextSymbolPos = symbolPosInBuffer[nSym]; - } else - if (nBytes == 4) - O << *(unsigned int*)(buffer+pos); - else - O << *(unsigned long long*)(buffer+pos); + } else if (nBytes == 4) + O << *(unsigned int *)(buffer + pos); + else + O << *(unsigned long long *)(buffer + pos); } } } @@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { virtual void emitSrcInText(StringRef filename, unsigned line); -private : - virtual const char *getPassName() const { - return "NVPTX Assembly Printer"; - } +private: + virtual const char *getPassName() const { return "NVPTX Assembly Printer"; } const Function *F; std::string CurrentFnName; @@ -207,31 +195,28 @@ private : void printGlobalVariable(const GlobalVariable *GVar); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); - void printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, raw_ostream &O); + const char *Modifier = 0); + void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, + raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; // definition autogenerated. void printInstruction(const MachineInstr *MI, raw_ostream &O); - void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, - bool=false); + void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false); void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitHeader(Module &M, raw_ostream &O); - void emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const; + void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); void emitFunctionExternParamList(const MachineFunction &MF); void emitFunctionParamList(const Function *, raw_ostream &O); void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); - void emitFunctionTempData(const MachineFunction &MF, - unsigned &FrameSize); + void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize); bool isImageType(const Type *Ty); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, @@ -269,17 +254,16 @@ private: void recordAndEmitFilenames(Module &); void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); - void emitPTXAddressSpace(unsigned int AddressSpace, - raw_ostream &O) const; - std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ; - void printScalarConstant(Constant *CPV, raw_ostream &O) ; - void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ; - void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ; - void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ; + void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; + std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; + void printScalarConstant(Constant *CPV, raw_ostream &O); + void printFPConstant(const ConstantFP *Fp, raw_ostream &O); + void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer); + void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer); void printOperandProper(const MachineOperand &MO); - void emitLinkageDirective(const GlobalValue* V, raw_ostream &O); + void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); void emitDeclarations(Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); @@ -289,10 +273,9 @@ private: LineReader *reader; LineReader *getReader(std::string); public: - NVPTXAsmPrinter(TargetMachine &TM, - MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { CurrentBankselLabelInBasicBlock = ""; VRidGlobal2LocalMap = NULL; reader = NULL; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index a9abc00bf3f6..6533da5102b0 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -17,17 +17,15 @@ #include "NVPTXSubtarget.h" #include "NVPTXTargetMachine.h" #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; -bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { - return true; -} +bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFrameInfo()->hasStackObjects()) { @@ -42,35 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // mov %SPL, %depot; // cvta.local %SP, %SPL; if (is64bit) { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } else { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } - } - else { + } else { // mov %SP, %depot; if (is64bit) - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); else - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); } } } void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { + MachineBasicBlock &MBB) const {} + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void NVPTXFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, + // ADJCALLSTACKUP instructions. + MBB.erase(I); } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index ee87b3997e78..819f1dd3f4be 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -16,7 +16,6 @@ #include "llvm/Target/TargetFrameLowering.h" - namespace llvm { class NVPTXTargetMachine; @@ -26,13 +25,16 @@ class NVPTXFrameLowering : public TargetFrameLowering { public: explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit) - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), - tm(_tm), is64bit(_is64bit) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm), + is64bit(_is64bit) {} virtual bool hasFP(const MachineFunction &MF) const; virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; }; } // End llvm namespace diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 4e92f0e785fe..e862988c85d1 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -11,42 +11,36 @@ // //===----------------------------------------------------------------------===// - -#include "llvm/Instructions.h" -#include "llvm/Support/raw_ostream.h" #include "NVPTXISelDAGToDAG.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/GlobalValue.h" #undef DEBUG_TYPE #define DEBUG_TYPE "nvptx-isel" using namespace llvm; - -static cl::opt<bool> -UseFMADInstruction("nvptx-mad-enable", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Enable generating FMAD instructions"), - cl::init(false)); +static cl::opt<bool> UseFMADInstruction( + "nvptx-mad-enable", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Enable generating FMAD instructions"), + cl::init(false)); static cl::opt<int> -FMAContractLevel("nvptx-fma-level", - cl::ZeroOrMore, +FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::init(2)); + " 1: do it 2: do it aggressively"), + cl::init(2)); - -static cl::opt<int> -UsePrecDivF32("nvptx-prec-divf32", - cl::ZeroOrMore, - cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" - " IEEE Compliant F32 div.rnd if avaiable."), - cl::init(2)); +static cl::opt<int> UsePrecDivF32( + "nvptx-prec-divf32", cl::ZeroOrMore, + cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" + " IEEE Compliant F32 div.rnd if avaiable."), + cl::init(2)); /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. @@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, return new NVPTXDAGToDAGISel(TM, OptLevel); } - NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) -: SelectionDAGISel(tm, OptLevel), - Subtarget(tm.getSubtarget<NVPTXSubtarget>()) -{ + : SelectionDAGISel(tm, OptLevel), + Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { // Always do fma.f32 fpcontract if the target supports the instruction. // Always do fma.f64 fpcontract if the target supports the instruction. // Do mad.f32 is nvptx-mad-enable is specified and the target does not // support fma.f32. doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32(); - doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel>=1); - doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel>=1); - doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel==2); - doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel==2); + doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1); + doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1); + doFMAF32AGG = + (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2); + doFMAF64AGG = + (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2); allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction; @@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. -SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) - return NULL; // Already selected. + return NULL; // Already selected. SDNode *ResNode = NULL; switch (N->getOpcode()) { @@ -105,29 +95,48 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: ResNode = SelectStore(N); break; + case NVPTXISD::LoadV2: + case NVPTXISD::LoadV4: + ResNode = SelectLoadVector(N); + break; + case NVPTXISD::LDGV2: + case NVPTXISD::LDGV4: + case NVPTXISD::LDUV2: + case NVPTXISD::LDUV4: + ResNode = SelectLDGLDUVector(N); + break; + case NVPTXISD::StoreV2: + case NVPTXISD::StoreV4: + ResNode = SelectStoreVector(N); + break; + default: + break; } if (ResNode) return ResNode; return SelectCode(N); } - -static unsigned int -getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) -{ +static unsigned int getCodeAddrSpace(MemSDNode *N, + const NVPTXSubtarget &Subtarget) { const Value *Src = N->getSrcValue(); if (!Src) return NVPTX::PTXLdStInstCode::LOCAL; if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { switch (PT->getAddressSpace()) { - case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; - case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; - case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; + case llvm::ADDRESS_SPACE_LOCAL: + return NVPTX::PTXLdStInstCode::LOCAL; + case llvm::ADDRESS_SPACE_GLOBAL: + return NVPTX::PTXLdStInstCode::GLOBAL; + case llvm::ADDRESS_SPACE_SHARED: + return NVPTX::PTXLdStInstCode::SHARED; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: return NVPTX::PTXLdStInstCode::CONSTANT; - case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; - case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; + case llvm::ADDRESS_SPACE_GENERIC: + return NVPTX::PTXLdStInstCode::GENERIC; + case llvm::ADDRESS_SPACE_PARAM: + return NVPTX::PTXLdStInstCode::PARAM; case llvm::ADDRESS_SPACE_CONST: // If the arch supports generic address space, translate it to GLOBAL // for correctness. @@ -138,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) return NVPTX::PTXLdStInstCode::GLOBAL; else return NVPTX::PTXLdStInstCode::CONSTANT; - default: break; + default: + break; } } return NVPTX::PTXLdStInstCode::LOCAL; } - -SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); - SDNode *NVPTXLD= NULL; + SDNode *NVPTXLD = NULL; // do not support pre/post inc/dec if (LD->isIndexed()) @@ -189,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); - unsigned fromTypeWidth = ScalarVT.getSizeInBits(); + unsigned fromTypeWidth = ScalarVT.getSizeInBits(); unsigned int fromType; if ((LD->getExtensionType() == ISD::SEXTLOAD)) fromType = NVPTX::PTXLdStInstCode::Signed; @@ -208,121 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { if (SelectDirectAddr(N1, Addr)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; - case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; - case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; - case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; - case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; - case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Addr, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N1.getNode(), N1, Base, Offset): - SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Addr, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) + : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; - case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; - case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; - case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; - case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; - case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N1.getNode(), N1, Base, Offset): - SelectADDRri(N1.getNode(), N1, Base, Offset)) { - switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break; - default: return NULL; + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N1.getNode(), N1, Base, Offset) + : SelectADDRri(N1.getNode(), N1, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (TargetVT) { + case MVT::i8: + Opcode = NVPTX::LD_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari_64; + break; + default: + return NULL; + } + } else { + switch (TargetVT) { + case MVT::i8: + Opcode = NVPTX::LD_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari; + break; + default: + return NULL; + } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } - else { - switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; - case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break; - case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break; - case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break; - case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break; - case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break; - case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break; - case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break; - case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break; - case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break; - case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break; - default: return NULL; + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else { + if (Subtarget.is64Bit()) { + switch (TargetVT) { + case MVT::i8: + Opcode = NVPTX::LD_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg_64; + break; + default: + return NULL; + } + } else { + switch (TargetVT) { + case MVT::i8: + Opcode = NVPTX::LD_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg; + break; + default: + return NULL; + } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - N1, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), N1, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } if (NVPTXLD != NULL) { @@ -334,7 +388,590 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { return NVPTXLD; } -SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { + + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Addr, Offset, Base; + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *LD; + MemSDNode *MemSD = cast<MemSDNode>(N); + EVT LoadedVT = MemSD->getMemoryVT(); + + if (!LoadedVT.isSimple()) + return NULL; + + // Address Space Setting + unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool IsVolatile = MemSD->isVolatile(); + if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + IsVolatile = false; + + // Vector Setting + MVT SimpleVT = LoadedVT.getSimpleVT(); + + // Type Setting: fromType + fromTypeWidth + // + // Sign : ISD::SEXTLOAD + // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the + // type is integer + // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float + MVT ScalarVT = SimpleVT.getScalarType(); + unsigned FromTypeWidth = ScalarVT.getSizeInBits(); + unsigned int FromType; + // The last operand holds the original LoadSDNode::getExtensionType() value + unsigned ExtensionType = cast<ConstantSDNode>( + N->getOperand(N->getNumOperands() - 1))->getZExtValue(); + if (ExtensionType == ISD::SEXTLOAD) + FromType = NVPTX::PTXLdStInstCode::Signed; + else if (ScalarVT.isFloatingPoint()) + FromType = NVPTX::PTXLdStInstCode::Float; + else + FromType = NVPTX::PTXLdStInstCode::Unsigned; + + unsigned VecType; + + switch (N->getOpcode()) { + case NVPTXISD::LoadV2: + VecType = NVPTX::PTXLdStInstCode::V2; + break; + case NVPTXISD::LoadV4: + VecType = NVPTX::PTXLdStInstCode::V4; + break; + default: + return NULL; + } + + EVT EltVT = N->getValueType(0); + + if (SelectDirectAddr(Op1, Addr)) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_avar; + break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_avar; + break; + } + break; + } + + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Addr, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_asi; + break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_asi; + break; + } + break; + } + + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari_64; + break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari_64; + break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari; + break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari; + break; + } + break; + } + } + + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; + + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg_64; + break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg_64; + break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LoadV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg; + break; + } + break; + case NVPTXISD::LoadV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg; + break; + } + break; + } + } + + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + } + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return LD; +} + +SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { + + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *LD; + + EVT RetVT = N->getValueType(0); + + // Select opcode + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LDGV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; + break; + } + break; + case NVPTXISD::LDGV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; + break; + } + break; + case NVPTXISD::LDUV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + break; + } + break; + case NVPTXISD::LDUV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::LDGV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; + break; + } + break; + case NVPTXISD::LDGV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; + break; + } + break; + case NVPTXISD::LDUV2: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + break; + } + break; + case NVPTXISD::LDUV4: + switch (RetVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + break; + } + break; + } + } + + SDValue Ops[] = { Op1, Chain }; + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2); + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return LD; +} + +SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(N); EVT StoreVT = ST->getMemoryVT(); @@ -375,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { // - for integer type, always use 'u' // MVT ScalarVT = SimpleVT.getScalarType(); - unsigned toTypeWidth = ScalarVT.getSizeInBits(); + unsigned toTypeWidth = ScalarVT.getSizeInBits(); unsigned int toType; if (ScalarVT.isFloatingPoint()) toType = NVPTX::PTXLdStInstCode::Float; @@ -394,124 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; - case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; - case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; - case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; - case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; - case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Addr, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N2.getNode(), N2, Base, Offset): - SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Addr, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; - case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; - case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; - case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; - case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; - case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N2.getNode(), N2, Base, Offset): - SelectADDRri(N2.getNode(), N2, Base, Offset)) { - switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break; - default: return NULL; + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (SourceVT) { + case MVT::i8: + Opcode = NVPTX::ST_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari_64; + break; + default: + return NULL; + } + } else { + switch (SourceVT) { + case MVT::i8: + Opcode = NVPTX::ST_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari; + break; + default: + return NULL; + } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { - switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; - case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break; - case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break; - case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break; - case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break; - case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break; - case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break; - case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break; - case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break; - case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break; - case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break; - default: return NULL; + if (Subtarget.is64Bit()) { + switch (SourceVT) { + case MVT::i8: + Opcode = NVPTX::ST_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg_64; + break; + default: + return NULL; + } + } else { + switch (SourceVT) { + case MVT::i8: + Opcode = NVPTX::ST_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg; + break; + default: + return NULL; + } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - N2, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), N2, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } if (NVPTXST != NULL) { @@ -523,12 +1202,388 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { return NVPTXST; } +SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue Addr, Offset, Base; + unsigned Opcode; + DebugLoc DL = N->getDebugLoc(); + SDNode *ST; + EVT EltVT = Op1.getValueType(); + MemSDNode *MemSD = cast<MemSDNode>(N); + EVT StoreVT = MemSD->getMemoryVT(); + + // Address Space Setting + unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + + if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { + report_fatal_error("Cannot store to pointer that points to constant " + "memory space"); + } + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool IsVolatile = MemSD->isVolatile(); + if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + IsVolatile = false; + + // Type Setting: toType + toTypeWidth + // - for integer type, always use 'u' + assert(StoreVT.isSimple() && "Store value is not simple"); + MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); + unsigned ToTypeWidth = ScalarVT.getSizeInBits(); + unsigned ToType; + if (ScalarVT.isFloatingPoint()) + ToType = NVPTX::PTXLdStInstCode::Float; + else + ToType = NVPTX::PTXLdStInstCode::Unsigned; + + SmallVector<SDValue, 12> StOps; + SDValue N2; + unsigned VecType; + + switch (N->getOpcode()) { + case NVPTXISD::StoreV2: + VecType = NVPTX::PTXLdStInstCode::V2; + StOps.push_back(N->getOperand(1)); + StOps.push_back(N->getOperand(2)); + N2 = N->getOperand(3); + break; + case NVPTXISD::StoreV4: + VecType = NVPTX::PTXLdStInstCode::V4; + StOps.push_back(N->getOperand(1)); + StOps.push_back(N->getOperand(2)); + StOps.push_back(N->getOperand(3)); + StOps.push_back(N->getOperand(4)); + N2 = N->getOperand(5); + break; + default: + return NULL; + } + + StOps.push_back(getI32Imm(IsVolatile)); + StOps.push_back(getI32Imm(CodeAddrSpace)); + StOps.push_back(getI32Imm(VecType)); + StOps.push_back(getI32Imm(ToType)); + StOps.push_back(getI32Imm(ToTypeWidth)); + + if (SelectDirectAddr(N2, Addr)) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_avar; + break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_avar; + break; + } + break; + } + StOps.push_back(Addr); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_asi; + break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_asi; + break; + } + break; + } + StOps.push_back(Base); + StOps.push_back(Offset); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari_64; + break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari_64; + break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari; + break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari; + break; + } + break; + } + } + StOps.push_back(Base); + StOps.push_back(Offset); + } else { + if (Subtarget.is64Bit()) { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg_64; + break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg_64; + break; + } + break; + } + } else { + switch (N->getOpcode()) { + default: + return NULL; + case NVPTXISD::StoreV2: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg; + break; + } + break; + case NVPTXISD::StoreV4: + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg; + break; + } + break; + } + } + StOps.push_back(N2); + } + + StOps.push_back(Chain); + + ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size()); + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1); + + return ST; +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { // Return true if TGA or ES. - if (N.getOpcode() == ISD::TargetGlobalAddress - || N.getOpcode() == ISD::TargetExternalSymbol) { + if (N.getOpcode() == ISD::TargetGlobalAddress || + N.getOpcode() == ISD::TargetExternalSymbol) { Address = N; return true; } @@ -546,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { } // symbol+offset -bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRsi_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - SDValue base=Addr.getOperand(0); + SDValue base = Addr.getOperand(0); if (SelectDirectAddr(base, Base)) { Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); return true; @@ -574,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, } // register+offset -bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRri_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); Offset = CurDAG->getTargetConstant(0, mvt); @@ -584,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // direct calls. + return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { if (SelectDirectAddr(Addr.getOperand(0), Addr)) { @@ -592,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { if (FrameIndexSDNode *FIN = - dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) // Constant offset from frame ref. Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); else @@ -624,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, // (See SelectionDAGNodes.h). So we need to check for both. if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { Src = mN->getSrcValue(); - } - else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { + } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { Src = mN->getSrcValue(); } if (!Src) @@ -637,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. -bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) { +bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; switch (ConstraintCode) { - default: return true; - case 'm': // memory + default: + return true; + case 'm': // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); @@ -666,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, // pattern matcher inserts a bunch of IMOVi8rr to convert // the imm to i8imm, and this causes instruction selection // to fail. -bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, - SDValue &Retval) { - if (!(N.getOpcode() == ISD::UNDEF) && - !(N.getOpcode() == ISD::Constant)) +bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) { + if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant)) return false; if (N.getOpcode() == ISD::UNDEF) diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index ccd69b29dd42..70e8e464297d 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -18,8 +18,8 @@ #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" -#include "llvm/Intrinsics.h" using namespace llvm; namespace { @@ -64,16 +64,18 @@ public: const NVPTXSubtarget &Subtarget; - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); + virtual bool SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); private: - // Include the pieces autogenerated from the target description. +// Include the pieces autogenerated from the target description. #include "NVPTXGenDAGISel.inc" SDNode *Select(SDNode *N); - SDNode* SelectLoad(SDNode *N); - SDNode* SelectStore(SDNode *N); + SDNode *SelectLoad(SDNode *N); + SDNode *SelectLoadVector(SDNode *N); + SDNode *SelectLDGLDUVector(SDNode *N); + SDNode *SelectStore(SDNode *N); + SDNode *SelectStoreVector(SDNode *N); inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); @@ -96,7 +98,6 @@ private: bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); - bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index f1a99d77be9d..6e01a5a82071 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -11,30 +11,29 @@ // //===----------------------------------------------------------------------===// - -#include "NVPTX.h" #include "NVPTXISelLowering.h" +#include "NVPTX.h" #include "NVPTXTargetMachine.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXUtilities.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalValue.h" -#include "llvm/Module.h" -#include "llvm/Function.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/MC/MCSectionELF.h" #include <sstream> #undef DEBUG_TYPE @@ -44,28 +43,39 @@ using namespace llvm; static unsigned int uniqueCallSite = 0; -static cl::opt<bool> -RetainVectorOperands("nvptx-codegen-vectors", - cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), - cl::init(true)); +static cl::opt<bool> sched4reg( + "nvptx-sched4reg", + cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); -static cl::opt<bool> -sched4reg("nvptx-sched4reg", - cl::desc("NVPTX Specific: schedule for register pressue"), - cl::init(false)); +static bool IsPTXVectorType(MVT VT) { + switch (VT.SimpleTy) { + default: + return false; + case MVT::v2i8: + case MVT::v4i8: + case MVT::v2i16: + case MVT::v4i16: + case MVT::v2i32: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v4f32: + case MVT::v2f64: + return true; + } +} // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) -: TargetLowering(TM, new NVPTXTargetObjectFile()), - nvTM(&TM), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather // then generating calls to memset, mempcy or memmove. - maxStoresPerMemset = (unsigned)0xFFFFFFFF; - maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; - maxStoresPerMemmove = (unsigned)0xFFFFFFFF; + MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -87,82 +97,51 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - if (RetainVectorOperands) { - addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); - addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); - addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); - addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); - addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); - addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); - addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); - addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); - addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); - addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); - - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); - } - // Operations not directly supported by NVPTX. - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Expand); + setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (nvptxSubtarget.hasROT64()) { - setOperationAction(ISD::ROTL , MVT::i64, Legal); - setOperationAction(ISD::ROTR , MVT::i64, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i64, Expand); - setOperationAction(ISD::ROTR , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); } if (nvptxSubtarget.hasROT32()) { - setOperationAction(ISD::ROTL , MVT::i32, Legal); - setOperationAction(ISD::ROTR , MVT::i32, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i32, Expand); - setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); } - setOperationAction(ISD::ROTL , MVT::i16, Expand); - setOperationAction(ISD::ROTR , MVT::i16, Expand); - setOperationAction(ISD::ROTL , MVT::i8, Expand); - setOperationAction(ISD::ROTR , MVT::i8, Expand); - setOperationAction(ISD::BSWAP , MVT::i16, Expand); - setOperationAction(ISD::BSWAP , MVT::i32, Expand); - setOperationAction(ISD::BSWAP , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); + setOperationAction(ISD::BSWAP, MVT::i16, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); // Indirect branch is not supported. // This also disables Jump Table creation. - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); - setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); // We want to legalize constant related memmove and memcopy // intrinsics. @@ -185,92 +164,114 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::i8, MVT::i1, Expand); // This is legal in NVPTX - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // TRAP can be lowered to PTX trap - setOperationAction(ISD::TRAP, MVT::Other, Legal); - - // By default, CONCAT_VECTORS is implemented via store/load - // through stack. It is slow and uses local memory. We need - // to custom-lowering them. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); - - // Expand vector int to float and float to int conversions - // - For SINT_TO_FP and UINT_TO_FP, the src type - // (Node->getOperand(0).getValueType()) - // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, - // the dest type (Node->getValueType(0)) is used. - // - // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector - // case, and - // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. - // - // That is why v4i32 or v2i32 are used here. - // - // The expansion for vectors happens in VectorLegalizer::LegalizeOp() - // (LegalizeVectorOps.cpp). - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // Register custom handling for vector loads/stores + for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT VT = (MVT::SimpleValueType) i; + if (IsPTXVectorType(VT)) { + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); + } + } // Now deduce the information based on the above mentioned // actions computeRegisterProperties(); } - const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; - case NVPTXISD::CALL: return "NVPTXISD::CALL"; - case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; - case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; - case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; - case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; + default: + return 0; + case NVPTXISD::CALL: + return "NVPTXISD::CALL"; + case NVPTXISD::RET_FLAG: + return "NVPTXISD::RET_FLAG"; + case NVPTXISD::Wrapper: + return "NVPTXISD::Wrapper"; + case NVPTXISD::NVBuiltin: + return "NVPTXISD::NVBuiltin"; + case NVPTXISD::DeclareParam: + return "NVPTXISD::DeclareParam"; case NVPTXISD::DeclareScalarParam: return "NVPTXISD::DeclareScalarParam"; - case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; - case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; - case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; - case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; - case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; - case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; - case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; - case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; - case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; - case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; - case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; - case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; - case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; - case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; - case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; - case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; - case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; - case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; - case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; - case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; - case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; - case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; - case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; - case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::DeclareRet: + return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareRetParam: + return "NVPTXISD::DeclareRetParam"; + case NVPTXISD::PrintCall: + return "NVPTXISD::PrintCall"; + case NVPTXISD::LoadParam: + return "NVPTXISD::LoadParam"; + case NVPTXISD::StoreParam: + return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamS32: + return "NVPTXISD::StoreParamS32"; + case NVPTXISD::StoreParamU32: + return "NVPTXISD::StoreParamU32"; + case NVPTXISD::MoveToParam: + return "NVPTXISD::MoveToParam"; + case NVPTXISD::CallArgBegin: + return "NVPTXISD::CallArgBegin"; + case NVPTXISD::CallArg: + return "NVPTXISD::CallArg"; + case NVPTXISD::LastCallArg: + return "NVPTXISD::LastCallArg"; + case NVPTXISD::CallArgEnd: + return "NVPTXISD::CallArgEnd"; + case NVPTXISD::CallVoid: + return "NVPTXISD::CallVoid"; + case NVPTXISD::CallVal: + return "NVPTXISD::CallVal"; + case NVPTXISD::CallSymbol: + return "NVPTXISD::CallSymbol"; + case NVPTXISD::Prototype: + return "NVPTXISD::Prototype"; + case NVPTXISD::MoveParam: + return "NVPTXISD::MoveParam"; + case NVPTXISD::MoveRetval: + return "NVPTXISD::MoveRetval"; + case NVPTXISD::MoveToRetval: + return "NVPTXISD::MoveToRetval"; + case NVPTXISD::StoreRetval: + return "NVPTXISD::StoreRetval"; + case NVPTXISD::PseudoUseParam: + return "NVPTXISD::PseudoUseParam"; + case NVPTXISD::RETURN: + return "NVPTXISD::RETURN"; + case NVPTXISD::CallSeqBegin: + return "NVPTXISD::CallSeqBegin"; + case NVPTXISD::CallSeqEnd: + return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::LoadV2: + return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: + return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: + return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: + return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: + return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: + return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: + return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: + return "NVPTXISD::StoreV4"; } } +bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const { + return VT == MVT::i1; +} SDValue NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { @@ -280,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); } -std::string NVPTXTargetLowering::getPrototype(Type *retTy, - const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, - unsigned retAlignment) const { +std::string NVPTXTargetLowering::getPrototype( + Type *retTy, const ArgListTy &Args, + const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); @@ -299,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; - } - else { + if (size < 32) + size = 32; + } else { assert(retTy->isFloatingPointTy() && "Floating point type expected here"); size = retTy->getPrimitiveSizeInBits(); } O << ".param .b" << size << " _"; - } - else if (isa<PointerType>(retTy)) - O << ".param .b" << getPointerTy().getSizeInBits() - << " _"; + } else if (isa<PointerType>(retTy)) + O << ".param .b" << getPointerTy().getSizeInBits() << " _"; else { if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } - O << ".param .align " - << retAlignment - << " .b8 _[" - << totalsz << "]"; - } - else { - assert(false && - "Unknown return type"); + O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; + } else { + assert(false && "Unknown return type"); } } - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -354,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " _"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -372,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, bool first = true; MVT thePointerTy = getPointerTy(); - for (unsigned i=0,e=Args.size(); i!=e; ++i) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { const Type *Ty = Args[i].Ty; if (!first) { O << ", "; @@ -383,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -397,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, continue; } const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); unsigned sz = getDataLayout()->getTypeAllocSize(ETy); - O << ".param .align " << align - << " .b8 "; + O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; continue; - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -421,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " "; O << "_"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; } - if (i<e-1) + if (i < e - 1) O << ", "; } continue; @@ -438,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, return O.str(); } - -SDValue -NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const { - SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; +SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; - ArgListTy &Args = CLI.Args; - Type *retTy = CLI.RetTy; - ImmutableCallSite *CS = CLI.CS; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + ArgListTy &Args = CLI.Args; + Type *retTy = CLI.RetTy; + ImmutableCallSite *CS = CLI.CS; bool isABI = (nvptxSubtarget.getSmVersion() >= 20); SDValue tempChain = Chain; - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true)); + Chain = + DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true)); SDValue InFlag = Chain.getValue(1); assert((Outs.size() == Args.size()) && @@ -466,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned paramCount = 0; // Declare the .params or .reg need to pass values // to the function - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { EVT VT = Outs[i].VT; if (Outs[i].Flags.isByVal() == false) { @@ -477,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isABI) isReg = 0; unsigned sz = VT.getSizeInBits(); - if (VT.isInteger() && (sz < 32)) sz = 32; + if (VT.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), - DAG.getConstant(isReg, MVT::i32), - InFlag }; + DAG.getConstant(isReg, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; + DAG.getConstant(0, MVT::i32), OutVals[i], + InFlag }; unsigned opcode = NVPTXISD::StoreParam; if (isReg) @@ -509,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // struct or vector SmallVector<EVT, 16> vtparts; const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); - assert(PTy && - "Type of a byval parameter should be pointer"); + assert(PTy && "Type of a byval parameter should be pointer"); ComputeValueVTs(*this, PTy->getElementType(), vtparts); if (isABI) { @@ -520,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The ByValAlign in the Outs[i].Flags is alway set at this point, so we // don't need to // worry about natural alignment or not. See TargetLowering::LowerCallTo() - SDValue DeclareParamOps[] = { Chain, - DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), + DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), + InFlag + }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), - OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(curOffset, MVT::i32), - theVal, InFlag }; + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(curOffset, MVT::i32), + theVal, InFlag }; Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, 5); InFlag = Chain.getValue(1); - curOffset += sz/8; + curOffset += sz / 8; } } ++paramCount; @@ -562,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Non-abi, struct or vector // Declare a bunch or .reg .b<size> .param<n> unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(sz, MVT::i32), - DAG.getConstant(1, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(1, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), + false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(0, MVT::i32), theVal, @@ -610,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or // individual .reg .b<size> func_retval<0..> for non ABI unsigned resultsz = 0; - for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = resvtparts[i]; if (resvtparts[i].isVector()) { elems = resvtparts[i].getVectorNumElements(); elemtype = resvtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (isABI == false) { - if (elemtype.isInteger() && (sz < 32)) sz = 32; - } - else { - if (elemtype.isInteger() && (sz < 8)) sz = 8; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; + } else { + if (elemtype.isInteger() && (sz < 8)) + sz = 8; } if (isABI == false) { SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); @@ -641,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (isABI) { if (retTy->isPrimitiveType() || retTy->isIntegerTy() || - retTy->isPointerTy() ) { + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -652,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); - } - else { + } else { if (Func) { // direct call if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) retAlignment = getDataLayout()->getABITypeAlignment(retTy); @@ -663,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, retAlignment = getDataLayout()->getABITypeAlignment(retTy); } SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, - MVT::i32), - DAG.getConstant(resultsz/8, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { Chain, + DAG.getConstant(retAlignment, MVT::i32), + DAG.getConstant(resultsz / 8, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); @@ -684,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // INLINEASM SDNode. SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); - const char *asmstr = nvTM->getManagedStrPool()-> - getManagedString(proto_string.c_str())->c_str(); - SDValue InlineAsmOps[] = { Chain, - DAG.getTargetExternalSymbol(asmstr, - getPointerTy()), - DAG.getMDNode(0), - DAG.getTargetConstant(0, MVT::i32), InFlag }; + const char *asmstr = nvTM->getManagedStrPool() + ->getManagedString(proto_string.c_str())->c_str(); + SDValue InlineAsmOps[] = { + Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()), + DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag + }; Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); InFlag = Chain.getValue(1); } // Op to just print "call" SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrintCallOps[] = { Chain, - DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) - : retCount, MVT::i32), - InFlag }; - Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, - PrintCallVTs, PrintCallOps, 3); + SDValue PrintCallOps[] = { + Chain, + DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32), + InFlag + }; + Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), + dl, PrintCallVTs, PrintCallOps, 3); InFlag = Chain.getValue(1); // Ops to print out the function name @@ -717,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallArgBeginOps, 2); InFlag = Chain.getValue(1); - for (unsigned i=0, e=paramCount; i!=e; ++i) { + for (unsigned i = 0, e = paramCount; i != e; ++i) { unsigned opcode; - if (i==(e-1)) + if (i == (e - 1)) opcode = NVPTXISD::LastCallArg; else opcode = NVPTXISD::CallArg; SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(i, MVT::i32), - InFlag }; + DAG.getConstant(i, MVT::i32), InFlag }; Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); InFlag = Chain.getValue(1); } SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgEndOps[] = { Chain, - DAG.getConstant(Func ? 1 : 0, MVT::i32), + SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, - 3); + Chain = + DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); InFlag = Chain.getValue(1); if (!Func) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrototypeOps[] = { Chain, - DAG.getConstant(uniqueCallSite, MVT::i32), + SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); InFlag = Chain.getValue(1); @@ -751,33 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Ins.size() > 0) { if (isABI) { unsigned resoffset = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = Ins[i].VT.getSizeInBits(); - if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; - std::vector<EVT> LoadRetVTs; - LoadRetVTs.push_back(Ins[i].VT); - LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - std::vector<SDValue> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); - LoadRetOps.push_back(InFlag); + if (Ins[i].VT.isInteger() && (sz < 8)) + sz = 8; + EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; + SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(resoffset, MVT::i32), InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, - &LoadRetOps[0], LoadRetOps.size()); + LoadRetOps, array_lengthof(LoadRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); InVals.push_back(retval); - resoffset += sz/8; + resoffset += sz / 8; } - } - else { + } else { SmallVector<EVT, 16> resvtparts; ComputeValueVTs(*this, retTy, resvtparts); assert(Ins.size() == resvtparts.size() && "Unexpected number of return values in non-ABI case"); unsigned paramNum = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(EVT(Ins[i].VT) == resvtparts[i] && "Unexpected EVT type in non-ABI case"); unsigned numelems = 1; @@ -787,17 +773,13 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, elemtype = Ins[i].VT.getVectorElementType(); } std::vector<SDValue> tempRetVals; - for (unsigned j=0; j<numelems; ++j) { - std::vector<EVT> MoveRetVTs; - MoveRetVTs.push_back(elemtype); - MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue); - std::vector<SDValue> MoveRetOps; - MoveRetOps.push_back(Chain); - MoveRetOps.push_back(DAG.getConstant(0, MVT::i32)); - MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32)); - MoveRetOps.push_back(InFlag); + for (unsigned j = 0; j < numelems; ++j) { + EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; + SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32), + DAG.getConstant(paramNum, MVT::i32), + InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, - &MoveRetOps[0], MoveRetOps.size()); + MoveRetOps, array_lengthof(MoveRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); tempRetVals.push_back(retval); @@ -811,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } } - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true), - DAG.getIntPtrConstant(uniqueCallSite+1, true), + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), + DAG.getIntPtrConstant(uniqueCallSite + 1, true), InFlag); uniqueCallSite++; @@ -826,76 +807,183 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() // (see LegalizeDAG.cpp). This is slow and uses local memory. // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 -SDValue NVPTXTargetLowering:: -LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); SmallVector<SDValue, 8> Ops; unsigned NumOperands = Node->getNumOperands(); - for (unsigned i=0; i < NumOperands; ++i) { + for (unsigned i = 0; i < NumOperands; ++i) { SDValue SubOp = Node->getOperand(i); EVT VVT = SubOp.getNode()->getValueType(0); EVT EltVT = VVT.getVectorElementType(); unsigned NumSubElem = VVT.getVectorNumElements(); - for (unsigned j=0; j < NumSubElem; ++j) { + for (unsigned j = 0; j < NumSubElem; ++j) { Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, DAG.getIntPtrConstant(j))); } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], + Ops.size()); } -SDValue NVPTXTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::RETURNADDR: return SDValue(); - case ISD::FRAMEADDR: return SDValue(); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return Op; + case ISD::RETURNADDR: + return SDValue(); + case ISD::FRAMEADDR: + return SDValue(); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return Op; case ISD::BUILD_VECTOR: case ISD::EXTRACT_SUBVECTOR: return Op; - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + case ISD::LOAD: + return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } +SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::i1) + return LowerLOADi1(Op, DAG); + else + return SDValue(); +} // v = ld i1* addr // => // v1 = ld i8* addr // v = trunc v1 to i1 -SDValue NVPTXTargetLowering:: -LowerLOAD(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(Node); DebugLoc dl = Node->getDebugLoc(); - assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; + assert(LD->getExtensionType() == ISD::NON_EXTLOAD); assert(Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only"); - SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - LD->getAlignment()); + SDValue newLD = + DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); // The legalizer (the caller) is expecting two values from the legalized // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() // in LegalizeDAG.cpp which also uses MergeValues. - SDValue Ops[] = {result, LD->getChain()}; + SDValue Ops[] = { result, LD->getChain() }; return DAG.getMergeValues(Ops, 2, dl); } +SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + EVT ValVT = Op.getOperand(1).getValueType(); + if (ValVT == MVT::i1) + return LowerSTOREi1(Op, DAG); + else if (ValVT.isVector()) + return LowerSTOREVector(Op, DAG); + else + return SDValue(); +} + +SDValue +NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDValue Val = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT ValVT = Val.getValueType(); + + if (ValVT.isVector()) { + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 stores of <2 x double> here + // but I'm leaving that as a TODO for now. + if (!ValVT.isSimple()) + return SDValue(); + switch (ValVT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + unsigned Opcode = 0; + EVT EltVT = ValVT.getVectorElementType(); + unsigned NumElts = ValVT.getVectorNumElements(); + + // Since StoreV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // stored type to i16 and propogate the "real" type as the memory type. + bool NeedExt = false; + if (EltVT.getSizeInBits() < 16) + NeedExt = true; + + switch (NumElts) { + default: + return SDValue(); + case 2: + Opcode = NVPTXISD::StoreV2; + break; + case 4: { + Opcode = NVPTXISD::StoreV4; + break; + } + } + + SmallVector<SDValue, 8> Ops; + + // First is the chain + Ops.push_back(N->getOperand(0)); + + // Then the split values + for (unsigned i = 0; i < NumElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, + DAG.getIntPtrConstant(i)); + if (NeedExt) + // ANY_EXTEND is correct here since the store will only look at the + // lower-order bits anyway. + ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); + Ops.push_back(ExtVal); + } + + // Then any remaining arguments + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { + Ops.push_back(N->getOperand(i)); + } + + MemSDNode *MemSD = cast<MemSDNode>(N); + + SDValue NewSt = DAG.getMemIntrinsicNode( + Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); + + //return DCI.CombineTo(N, NewSt, true); + return NewSt; + } + + return SDValue(); +} + // st i1 v, addr // => // v1 = zxt v to i8 // st i8, addr -SDValue NVPTXTargetLowering:: -LowerSTORE(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(Node); @@ -906,18 +994,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, - MVT::i8, Tmp3); - SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); return Result; } - -SDValue -NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, - EVT v) const { +SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, + int idx, EVT v) const { std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); std::stringstream suffix; suffix << idx; @@ -930,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { return getExtSymb(DAG, ".PARAM", idx, v); } -SDValue -NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { +SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { return getExtSymb(DAG, ".HLPPARAM", idx); } // Check to see if the kernel argument is image*_t or sampler_t bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { - static const char *const specialTypes[] = { - "struct._image2d_t", - "struct._image3d_t", - "struct._sampler_t" - }; + static const char *const specialTypes[] = { "struct._image2d_t", + "struct._image3d_t", + "struct._sampler_t" }; const Type *Ty = arg->getType(); const PointerType *PTy = dyn_cast<PointerType>(Ty); @@ -954,7 +1035,7 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { return false; const StructType *STy = dyn_cast<StructType>(PTy->getElementType()); - const std::string TypeName = STy ? STy->getName() : ""; + const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : ""; for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) if (TypeName == specialTypes[i]) @@ -963,17 +1044,15 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { return false; } -SDValue -NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue NVPTXTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); const DataLayout *TD = getDataLayout(); const Function *F = MF.getFunction(); - const AttrListPtr &PAL = F->getAttributes(); + const AttributeSet &PAL = F->getAttributes(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; @@ -984,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, std::vector<Type *> argTypes; std::vector<const Argument *> theArgs; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { + I != E; ++I) { theArgs.push_back(I); argTypes.push_back(I->getType()); } - assert(argTypes.size() == Ins.size() && - "Ins types and function types did not match"); + //assert(argTypes.size() == Ins.size() && + // "Ins types and function types did not match"); int idx = 0; - for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { + for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) { Type *Ty = argTypes[i]; EVT ObjectVT = getValueType(Ty); - assert(ObjectVT == Ins[i].VT && - "Ins type did not match function type"); + //assert(ObjectVT == Ins[i].VT && + // "Ins type did not match function type"); // If the kernel argument is image*_t or sampler_t, convert it to // a i32 constant holding the parameter position. This can later // matched in the AsmPrinter to output the correct mangled name. - if (isImageOrSamplerVal(theArgs[i], - (theArgs[i]->getParent() ? - theArgs[i]->getParent()->getParent() : 0))) { + if (isImageOrSamplerVal( + theArgs[i], + (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() + : 0))) { assert(isKernel && "Only kernels can have image/sampler params"); - InVals.push_back(DAG.getConstant(i+1, MVT::i32)); + InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); continue; } if (theArgs[i]->use_empty()) { // argument is dead - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + if (ObjectVT.isVector()) { + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned NumElts = ObjectVT.getVectorNumElements(); + for (unsigned vi = 0; vi < NumElts; ++vi) { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT)); + } + } else { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + } continue; } @@ -1019,29 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) { + if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { + if (ObjectVT.isVector()) { + unsigned NumElts = ObjectVT.getVectorNumElements(); + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned Offset = 0; + for (unsigned vi = 0; vi < NumElts; ++vi) { + SDValue A = getParamSymbol(DAG, idx, getPointerTy()); + SDValue B = DAG.getIntPtrConstant(Offset); + SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + //getParamSymbol(DAG, idx, EltVT), + //DAG.getConstant(Offset, getPointerTy())); + A, B); + Value *SrcValue = Constant::getNullValue(PointerType::get( + EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); + SDValue Ld = DAG.getLoad( + EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false, + false, + TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); + Offset += EltVT.getStoreSizeInBits() / 8; + InVals.push_back(Ld); + } + continue; + } + // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); - Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( - F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); - SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, - MachinePointerInfo(srcValue), false, false, - false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT( - F->getContext()))); + // Conjure up a value that we can get the address space from. + // FIXME: Using a constant here is a hack. + Value *srcValue = Constant::getNullValue( + PointerType::get(ObjectVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue p = DAG.getLoad( + ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, + false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); - } - else { + } else { // If no ABI, just move the param symbol SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); } continue; @@ -1058,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); if (isKernel) InVals.push_back(p); else { - SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, - DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), - p); + SDValue p2 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); InVals.push_back(p2); } } else { // Have to move a set of param symbols to registers and // store them locally and return the local pointer in InVals const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); - assert(elemPtrType && - "Byval parameter should be a pointer type"); + assert(elemPtrType && "Byval parameter should be a pointer type"); Type *elemType = elemPtrType->getElementType(); // Compute the constituent parts SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); unsigned totalsize = 0; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) totalsize += vtparts[j].getStoreSizeInBits(); - SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> - CreateStackObject(totalsize/8, 16, false), - getPointerTy()); + SDValue localcopy = DAG.getFrameIndex( + MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false), + getPointerTy()); unsigned sizesofar = 0; std::vector<SDValue> theChains; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned numElems = 1; - if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); - for (unsigned k=0, ke=numElems; k!=ke; ++k) { + if (vtparts[j].isVector()) + numElems = vtparts[j].getVectorNumElements(); + for (unsigned k = 0, ke = numElems; k != ke; ++k) { EVT tmpvt = vtparts[j]; - if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); + if (tmpvt.isVector()) + tmpvt = tmpvt.getVectorElementType(); SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, getParamSymbol(DAG, idx, tmpvt)); - SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, - DAG.getConstant(sizesofar, getPointerTy())); - theChains.push_back(DAG.getStore(Chain, dl, arg, addr, - MachinePointerInfo(), false, false, 0)); - sizesofar += tmpvt.getStoreSizeInBits()/8; + SDValue addr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, + DAG.getConstant(sizesofar, getPointerTy())); + theChains.push_back(DAG.getStore( + Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); + sizesofar += tmpvt.getStoreSizeInBits() / 8; ++idx; } } @@ -1118,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, //} if (!OutChains.empty()) - DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size())); + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], + OutChains.size())); return Chain; } -SDValue -NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, + SelectionDAG &DAG) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); unsigned sizesofar = 0; unsigned idx = 0; - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; EVT theValType = theVal.getValueType(); unsigned numElems = 1; - if (theValType.isVector()) numElems = theValType.getVectorNumElements(); - for (unsigned j=0,je=numElems; j!=je; ++j) { + if (theValType.isVector()) + numElems = theValType.getVectorNumElements(); + for (unsigned j = 0, je = numElems; j != je; ++j) { SDValue tmpval = theVal; if (theValType.isVector()) tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - theValType.getVectorElementType(), - tmpval, DAG.getIntPtrConstant(j)); - Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, - dl, MVT::Other, - Chain, - DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), + theValType.getVectorElementType(), tmpval, + DAG.getIntPtrConstant(j)); + Chain = DAG.getNode( + isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl, + MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), tmpval); if (theValType.isVector()) - sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; + sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8; else - sizesofar += theValType.getStoreSizeInBits()/8; + sizesofar += theValType.getStoreSizeInBits() / 8; ++idx; } } @@ -1162,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); } -void -NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const -{ +void NVPTXTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { if (Constraint.length() > 1) return; else @@ -1177,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // NVPTX suuport vector of legal types of any length in Intrinsics because the // NVPTX specific type legalizer // will legalize them to the PTX supported length. -bool -NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { +bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { if (isTypeLegal(VT)) return true; if (VT.isVector()) { @@ -1189,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { return false; } - // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as // TgtMemIntrinsic // because we need the information that is only available in the "Value" type // of destination // pointer. In particular, the address space information. -bool -NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, - unsigned Intrinsic) const { +bool NVPTXTargetLowering::getTgtMemIntrinsic( + IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { default: return false; @@ -1253,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, /// Used to guide target specific optimizations, like loop strength reduction /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) -bool -NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { +bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg @@ -1273,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, } switch (AM.Scale) { - case 0: // "r", "r+i" or "i" is allowed + case 0: // "r", "r+i" or "i" is allowed break; case 1: - if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. + if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. return false; // Otherwise we have r+i. break; @@ -1313,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } - -std::pair<unsigned, const TargetRegisterClass*> +std::pair<unsigned, const TargetRegisterClass *> NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { @@ -1337,9 +1441,253 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } - - /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; } + +/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. +static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &Results) { + EVT ResVT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + assert(ResVT.isVector() && "Vector load must have vector type"); + + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 loads of <2 x double> here + // but I'm leaving that as a TODO for now. + assert(ResVT.isSimple() && "Can only handle simple types"); + switch (ResVT.getSimpleVT().SimpleTy) { + default: + return; + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f32: + // This is a "native" vector type + break; + } + + EVT EltVT = ResVT.getVectorElementType(); + unsigned NumElts = ResVT.getVectorNumElements(); + + // Since LoadV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: + return; + case 2: + Opcode = NVPTXISD::LoadV2; + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + Opcode = NVPTXISD::LoadV4; + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector<SDValue, 8> OtherOps; + + // Copy regular operands + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + LoadSDNode *LD = cast<LoadSDNode>(N); + + // The select routine does not have access to the LoadSDNode instance, so + // pass along the extension information + OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], + OtherOps.size(), LD->getMemoryVT(), + LD->getMemOperand()); + + SmallVector<SDValue, 4> ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); +} + +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &Results) { + SDValue Chain = N->getOperand(0); + SDValue Intrin = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Get the intrinsic ID + unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); + switch (IntrinNo) { + default: + return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: { + EVT ResVT = N->getValueType(0); + + if (ResVT.isVector()) { + // Vector LDG/LDU + + unsigned NumElts = ResVT.getVectorNumElements(); + EVT EltVT = ResVT.getVectorElementType(); + + // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propogate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: + return; + case 2: + switch (IntrinNo) { + default: + return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV2; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV2; + break; + } + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + switch (IntrinNo) { + default: + return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV4; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV4; + break; + } + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs, 5); + break; + } + } + + SmallVector<SDValue, 8> OtherOps; + + // Copy regular operands + + OtherOps.push_back(Chain); // Chain + // Skip operand 1 (intrinsic ID) + // Others + for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) + OtherOps.push_back(N->getOperand(i)); + + MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); + + SDValue NewLD = DAG.getMemIntrinsicNode( + Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); + + SmallVector<SDValue, 4> ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = + DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); + } else { + // i8 LDG/LDU + assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && + "Custom handling of non-i8 ldu/ldg?"); + + // Just copy all operands as-is + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + + // Force output to i16 + SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); + + MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); + + // We make sure the memory type is i8, which will be used during isel + // to select the proper instruction. + SDValue NewLD = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], + Ops.size(), MVT::i8, MemSD->getMemOperand()); + + Results.push_back(NewLD.getValue(0)); + Results.push_back(NewLD.getValue(1)); + } + } + } +} + +void NVPTXTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { + switch (N->getOpcode()) { + default: + report_fatal_error("Unhandled custom legalization"); + case ISD::LOAD: + ReplaceLoadVector(N, DAG, Results); + return; + case ISD::INTRINSIC_W_CHAIN: + ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); + return; + } +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 94a177ceb00a..3cd49d38af76 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -58,7 +58,16 @@ enum NodeType { RETURN, CallSeqBegin, CallSeqEnd, - Dummy + Dummy, + + LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, + LoadV4, + LDGV2, // LDG.v2 + LDGV4, // LDG.v4 + LDUV2, // LDU.v2 + LDUV4, // LDU.v4 + StoreV2, + StoreV4 }; } @@ -78,7 +87,7 @@ public: bool isTypeSupportedInIntrinsic(MVT VT) const; - bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const; /// isLegalAddressingMode - Return true if the addressing mode represented @@ -92,18 +101,19 @@ public: virtual unsigned getFunctionAlignment(const Function *F) const; virtual EVT getSetCCResultType(EVT VT) const { + if (VT.isVector()) + return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); return MVT::i1; } ConstraintType getConstraintType(const std::string &Constraint) const; - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - virtual SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + virtual SDValue LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; @@ -125,22 +135,29 @@ public: NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - virtual MVT getShiftAmountTy(EVT LHSTy) const { - return MVT::i32; - } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + + virtual bool shouldSplitVectorElementType(EVT VT) const; private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here - SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = - MVT::i32) const; + SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, + EVT = MVT::i32) const; SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; + + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index cd50deb26a23..33a63c26f4e2 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -16,108 +16,62 @@ #include "NVPTXTargetMachine.h" #define GET_INSTRINFO_CTOR #include "NVPTXGenInstrInfo.inc" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include <cstdio> - using namespace llvm; // FIXME: Add the subtarget support on this constructor. NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) -: NVPTXGenInstrInfo(), - TM(tm), - RegInfo(*this, *TM.getSubtargetImpl()) {} - + : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {} -void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { +void NVPTXInstrInfo::copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const { if (NVPTX::Int32RegsRegClass.contains(DestReg) && NVPTX::Int32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int8RegsRegClass.contains(DestReg) && - NVPTX::Int8RegsRegClass.contains(SrcReg)) + NVPTX::Int8RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int1RegsRegClass.contains(DestReg) && - NVPTX::Int1RegsRegClass.contains(SrcReg)) + NVPTX::Int1RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float32RegsRegClass.contains(DestReg) && - NVPTX::Float32RegsRegClass.contains(SrcReg)) + NVPTX::Float32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int16RegsRegClass.contains(DestReg) && - NVPTX::Int16RegsRegClass.contains(SrcReg)) + NVPTX::Int16RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int64RegsRegClass.contains(DestReg) && - NVPTX::Int64RegsRegClass.contains(SrcReg)) + NVPTX::Int64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float64RegsRegClass.contains(DestReg) && - NVPTX::Float64RegsRegClass.contains(SrcReg)) + NVPTX::Float64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4F32RegsRegClass.contains(DestReg) && - NVPTX::V4F32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I32RegsRegClass.contains(DestReg) && - NVPTX::V4I32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2F32RegsRegClass.contains(DestReg) && - NVPTX::V2F32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I32RegsRegClass.contains(DestReg) && - NVPTX::V2I32RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I8RegsRegClass.contains(DestReg) && - NVPTX::V4I8RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I8RegsRegClass.contains(DestReg) && - NVPTX::V2I8RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V4I16RegsRegClass.contains(DestReg) && - NVPTX::V4I16RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I16RegsRegClass.contains(DestReg) && - NVPTX::V2I16RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2I64RegsRegClass.contains(DestReg) && - NVPTX::V2I64RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else if (NVPTX::V2F64RegsRegClass.contains(DestReg) && - NVPTX::V2F64RegsRegClass.contains(SrcReg)) - BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else { llvm_unreachable("Don't know how to copy a register"); } } -bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, +bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const { // Look for the appropriate part of TSFlags bool isMove = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> - NVPTX::SimpleMoveShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift; isMove = (TSFlags == 1); if (isMove) { @@ -134,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, return false; } -bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const -{ +bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const { switch (MI.getOpcode()) { - default: return false; + default: + return false; case NVPTX::INT_PTX_SREG_NTID_X: case NVPTX::INT_PTX_SREG_NTID_Y: case NVPTX::INT_PTX_SREG_NTID_Z: @@ -155,12 +109,11 @@ bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const } } - bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isLoad = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> - NVPTX::isLoadShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift; isLoad = (TSFlags == 1); if (isLoad) AddrSpace = getLdStCodeAddrSpace(MI); @@ -170,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isStore = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> - NVPTX::isStoreShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift; isStore = (TSFlags == 1); if (isStore) AddrSpace = getLdStCodeAddrSpace(MI); return isStore; } - bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { unsigned addrspace = 0; if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS) @@ -192,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { return true; } - /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns @@ -216,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { /// Note that RemoveBranch and InsertBranch must be implemented to support /// cases where this method returns success. /// -bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { +bool NVPTXInstrInfo::AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) @@ -248,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it. if (SecondLastInst->getOpcode() == NVPTX::CBranch && LastInst->getOpcode() == NVPTX::GOTO) { - TBB = SecondLastInst->getOperand(1).getMBB(); + TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; @@ -278,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; + if (I == MBB.begin()) + return 0; --I; if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch) return 0; @@ -288,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); - if (I == MBB.begin()) return 1; + if (I == MBB.begin()) + return 1; --I; if (I->getOpcode() != NVPTX::CBranch) return 1; @@ -298,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -unsigned -NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const { +unsigned NVPTXInstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -310,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // One-way branch. if (FBB == 0) { - if (Cond.empty()) // Unconditional branch + if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); - else // Conditional branch - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()) + .addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB); BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); return 2; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 7b8e218b05b6..b1972e9b7254 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -23,8 +23,7 @@ namespace llvm { -class NVPTXInstrInfo : public NVPTXGenInstrInfo -{ +class NVPTXInstrInfo : public NVPTXGenInstrInfo { NVPTXTargetMachine &TM; const NVPTXRegisterInfo RegInfo; public: @@ -50,30 +49,26 @@ public: * const TargetRegisterClass *RC) const; */ - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const ; - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, + virtual void copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const; bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isReadSpecialReg(MachineInstr &MI) const; - virtual bool CanTailMerge(const MachineInstr *MI) const ; + virtual bool CanTailMerge(const MachineInstr *MI) const; // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; + virtual bool AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; + virtual unsigned InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const { - return MI.getOperand(2).getImm(); + return MI.getOperand(2).getImm(); } }; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 8a410b872925..f43abe283b58 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -52,6 +52,7 @@ def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; def hasVote : Predicate<"Subtarget.hasVote()">; def hasDouble : Predicate<"Subtarget.hasDouble()">; def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; +def hasLDG : Predicate<"Subtarget.hasLDG()">; def hasLDU : Predicate<"Subtarget.hasLDU()">; def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; @@ -2153,11 +2154,21 @@ multiclass LD<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t$dst, [$addr];"), []>; + def _areg_64 : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", + " \t$dst, [$addr];"), []>; def _ari : NVPTXInst<(outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t$dst, [$addr+$offset];"), []>; + def _ari_64 : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", + " \t$dst, [$addr+$offset];"), []>; def _asi : NVPTXInst<(outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), @@ -2174,19 +2185,6 @@ defm LD_f32 : LD<Float32Regs>; defm LD_f64 : LD<Float64Regs>; } -let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in { -defm LD_v2i8 : LD<V2I8Regs>; -defm LD_v4i8 : LD<V4I8Regs>; -defm LD_v2i16 : LD<V2I16Regs>; -defm LD_v4i16 : LD<V4I16Regs>; -defm LD_v2i32 : LD<V2I32Regs>; -defm LD_v4i32 : LD<V4I32Regs>; -defm LD_v2f32 : LD<V2F32Regs>; -defm LD_v4f32 : LD<V4F32Regs>; -defm LD_v2i64 : LD<V2I64Regs>; -defm LD_v2f64 : LD<V2F64Regs>; -} - multiclass ST<NVPTXRegClass regclass> { def _avar : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, @@ -2198,11 +2196,21 @@ multiclass ST<NVPTXRegClass regclass> { LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", " \t[$addr], $src;"), []>; + def _areg_64 : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", + "\t[$addr], $src;"), []>; def _ari : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", " \t[$addr+$offset], $src;"), []>; + def _ari_64 : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", + "\t[$addr+$offset], $src;"), []>; def _asi : NVPTXInst<(outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), @@ -2219,19 +2227,6 @@ defm ST_f32 : ST<Float32Regs>; defm ST_f64 : ST<Float64Regs>; } -let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in { -defm ST_v2i8 : ST<V2I8Regs>; -defm ST_v4i8 : ST<V4I8Regs>; -defm ST_v2i16 : ST<V2I16Regs>; -defm ST_v4i16 : ST<V4I16Regs>; -defm ST_v2i32 : ST<V2I32Regs>; -defm ST_v4i32 : ST<V4I32Regs>; -defm ST_v2f32 : ST<V2F32Regs>; -defm ST_v4f32 : ST<V4F32Regs>; -defm ST_v2i64 : ST<V2I64Regs>; -defm ST_v2f64 : ST<V2F64Regs>; -} - // The following is used only in and after vector elementizations. // Vector elementization happens at the machine instruction level, so the // following instruction @@ -2247,11 +2242,21 @@ multiclass LD_VEC<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; + def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; + def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), @@ -2269,6 +2274,12 @@ multiclass LD_VEC<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; + def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2276,6 +2287,13 @@ multiclass LD_VEC<NVPTXRegClass regclass> { !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), []>; + def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), + []>; def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2304,12 +2322,23 @@ multiclass ST_VEC<NVPTXRegClass regclass> { LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; + def _v2_areg_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; def _v2_ari : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; + def _v2_ari_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, + i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; def _v2_asi : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, @@ -2328,6 +2357,12 @@ multiclass ST_VEC<NVPTXRegClass regclass> { i32imm:$fromWidth, Int32Regs:$addr), !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_areg_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; def _v4_ari : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2335,6 +2370,13 @@ multiclass ST_VEC<NVPTXRegClass regclass> { !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_ari_64 : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), + []>; def _v4_asi : NVPTXInst<(outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, @@ -2822,8 +2864,6 @@ def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>; -include "NVPTXVector.td" - include "NVPTXIntrinsics.td" diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 028a94bfd1bb..49e2568dfa2c 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1343,52 +1343,113 @@ defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -// Vector ldu -multiclass VLDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp, - NVPTXInst eleInst, NVPTXInst eleInst64> { - def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>, - Requires<[hasLDU]>; - def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>, - Requires<[hasLDU]>; + +//----------------------------------- +// Support for ldg on sm_35 or later +//----------------------------------- + +def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{ + MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); + return M->getMemoryVT() == MVT::i8; +}]>; + +multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDG]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; +} + +multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDG]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ld.global.nc.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; +} + +defm INT_PTX_LDG_GLOBAL_i8 + : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>; +defm INT_PTX_LDG_GLOBAL_i16 + : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_i32 + : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_i64 + : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>; +defm INT_PTX_LDG_GLOBAL_f32 + : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>; +defm INT_PTX_LDG_GLOBAL_f64 + : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>; +defm INT_PTX_LDG_GLOBAL_p32 + : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>; +defm INT_PTX_LDG_GLOBAL_p64 + : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>; + +// vector + +// Elementized vector ldg +multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; } -let VecInstType=isVecLD.Value in { -defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];", - V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32, - INT_PTX_LDU_G_v2i8_ELE_64>; -defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];", - V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32, - INT_PTX_LDU_G_v4i8_ELE_64>; -defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];", - V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32, - INT_PTX_LDU_G_v2i16_ELE_64>; -defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];", - V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32, - INT_PTX_LDU_G_v4i16_ELE_64>; -defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];", - V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32, - INT_PTX_LDU_G_v2i32_ELE_64>; -defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];", - V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32, - INT_PTX_LDU_G_v4i32_ELE_64>; -defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];", - V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32, - INT_PTX_LDU_G_v2f32_ELE_64>; -defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];", - V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32, - INT_PTX_LDU_G_v4f32_ELE_64>; -defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];", - V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32, - INT_PTX_LDU_G_v2i64_ELE_64>; -defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];", - V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32, - INT_PTX_LDU_G_v2f64_ELE_64>; +multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ld.global.nc.", TyStr), []>; } +// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. +defm INT_PTX_LDG_G_v2i8_ELE + : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v2i16_ELE + : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v2i32_ELE + : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; +defm INT_PTX_LDG_G_v2f32_ELE + : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; +defm INT_PTX_LDG_G_v2i64_ELE + : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; +defm INT_PTX_LDG_G_v2f64_ELE + : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; +defm INT_PTX_LDG_G_v4i8_ELE + : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v4i16_ELE + : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; +defm INT_PTX_LDG_G_v4i32_ELE + : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; +defm INT_PTX_LDG_G_v4f32_ELE + : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; multiclass NG_TO_G<string Str, Intrinsic Intrin> { diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 9273931e9919..7c257b4c6a89 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -12,31 +12,28 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/InstIterator.h" -#include "llvm/DataLayout.h" using namespace llvm; -namespace llvm { -FunctionPass *createLowerAggrCopies(); -} +namespace llvm { FunctionPass *createLowerAggrCopies(); } char NVPTXLowerAggrCopies::ID = 0; // Lower MemTransferInst or load-store pair to loop -static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, - Value *dstAddr, Value *len, - //unsigned numLoads, - bool srcVolatile, bool dstVolatile, - LLVMContext &Context, Function &F) { +static void convertTransferToLoop( + Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, + //unsigned numLoads, + bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { Type *indType = len->getType(); BasicBlock *origBB = splitAt->getParent(); @@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, // srcAddr and dstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = - dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); @@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, origBB->getTerminator()->setSuccessor(0, loopBB); IRBuilder<> builder(origBB, origBB->getTerminator()); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointer to the type of value getting stored - dstAddr = builder.CreateBitCast(dstAddr, - PointerType::get(val->getType(), dstAS)); + dstAddr = + builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS)); IRBuilder<> loop(loopBB); PHINode *ind = loop.CreatePHI(len->getType(), 0); @@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { //BasicBlock *bb = BI; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - if (LoadInst * load = dyn_cast<LoadInst>(II)) { + ++II) { + if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (load->hasOneUse() == false) continue; + if (load->hasOneUse() == false) + continue; - if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; + if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) + continue; User *use = *(load->use_begin()); - if (StoreInst * store = dyn_cast<StoreInst>(use)) { + if (StoreInst *store = dyn_cast<StoreInst>(use)) { if (store->getOperand(0) != load) //getValueOperand - continue; + continue; aggrLoads.push_back(load); } - } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) { + } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) { Value *len = intr->getLength(); // If the number of elements being copied is greater // than MaxAggrCopySize, lower it to a loop - if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemcpys.push_back(intr); } @@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // turn variable length memcpy/memmov into loop aggrMemcpys.push_back(intr); } - } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) { + } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) { Value *len = memsetintr->getLength(); - if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemsets.push_back(memsetintr); } @@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { } } } - if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) - && (aggrMemsets.size() == 0)) return false; + if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) && + (aggrMemsets.size() == 0)) + return false; // // Do the transformation of an aggr load/copy/set to a loop diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index b150c69815dd..286e753fa92b 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -15,9 +15,9 @@ #ifndef NVPTX_LOWER_AGGR_COPIES_H #define NVPTX_LOWER_AGGR_COPIES_H -#include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h index b4a4dbce98a9..a95c16b1e67e 100644 --- a/lib/Target/NVPTX/NVPTXNumRegisters.h +++ b/lib/Target/NVPTX/NVPTXNumRegisters.h @@ -11,10 +11,6 @@ #ifndef NVPTX_NUM_REGISTERS_H #define NVPTX_NUM_REGISTERS_H -namespace llvm { - -const unsigned NVPTXNumRegisters = 396; - -} +namespace llvm { const unsigned NVPTXNumRegisters = 396; } #endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index e3cd46f063bf..282465359b07 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -13,260 +13,88 @@ #define DEBUG_TYPE "nvptx-reg-info" -#include "NVPTX.h" #include "NVPTXRegisterInfo.h" +#include "NVPTX.h" #include "NVPTXSubtarget.h" #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Target/TargetInstrInfo.h" - using namespace llvm; -namespace llvm -{ -std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { +namespace llvm { +std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return ".f32"; } if (RC == &NVPTX::Float64RegsRegClass) { return ".f64"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return ".s64"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return ".s32"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return ".s16"; } - // Int8Regs become 16-bit registers in PTX - else if (RC == &NVPTX::Int8RegsRegClass) { + // Int8Regs become 16-bit registers in PTX + else if (RC == &NVPTX::Int8RegsRegClass) { return ".s16"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return ".pred"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else if (RC == &NVPTX::V2F32RegsRegClass) { - return ".v2.f32"; - } - else if (RC == &NVPTX::V4F32RegsRegClass) { - return ".v4.f32"; - } - else if (RC == &NVPTX::V2I32RegsRegClass) { - return ".v2.s32"; - } - else if (RC == &NVPTX::V4I32RegsRegClass) { - return ".v4.s32"; - } - else if (RC == &NVPTX::V2F64RegsRegClass) { - return ".v2.f64"; - } - else if (RC == &NVPTX::V2I64RegsRegClass) { - return ".v2.s64"; - } - else if (RC == &NVPTX::V2I16RegsRegClass) { - return ".v2.s16"; - } - else if (RC == &NVPTX::V4I16RegsRegClass) { - return ".v4.s16"; - } - else if (RC == &NVPTX::V2I8RegsRegClass) { - return ".v2.s16"; - } - else if (RC == &NVPTX::V4I8RegsRegClass) { - return ".v4.s16"; - } - else { + } else { return "INTERNAL"; } return ""; } -std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { +std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return "%f"; } if (RC == &NVPTX::Float64RegsRegClass) { return "%fd"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return "%rd"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return "%r"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return "%rs"; - } - else if (RC == &NVPTX::Int8RegsRegClass) { + } else if (RC == &NVPTX::Int8RegsRegClass) { return "%rc"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return "%p"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else if (RC == &NVPTX::V2F32RegsRegClass) { - return "%v2f"; - } - else if (RC == &NVPTX::V4F32RegsRegClass) { - return "%v4f"; - } - else if (RC == &NVPTX::V2I32RegsRegClass) { - return "%v2r"; - } - else if (RC == &NVPTX::V4I32RegsRegClass) { - return "%v4r"; - } - else if (RC == &NVPTX::V2F64RegsRegClass) { - return "%v2fd"; - } - else if (RC == &NVPTX::V2I64RegsRegClass) { - return "%v2rd"; - } - else if (RC == &NVPTX::V2I16RegsRegClass) { - return "%v2s"; - } - else if (RC == &NVPTX::V4I16RegsRegClass) { - return "%v4rs"; - } - else if (RC == &NVPTX::V2I8RegsRegClass) { - return "%v2rc"; - } - else if (RC == &NVPTX::V4I8RegsRegClass) { - return "%v4rc"; - } - else { + } else { return "INTERNAL"; } return ""; } - -bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return true; - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return true; - return false; -} - -std::string getNVPTXElemClassName(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass); - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass); - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); - llvm_unreachable("Not a vector register class"); -} - -const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return (&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return (&NVPTX::Float64RegsRegClass); - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return (&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return (&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return (&NVPTX::Int64RegsRegClass); - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return (&NVPTX::Int8RegsRegClass); - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return (&NVPTX::Float32RegsRegClass); - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return (&NVPTX::Int16RegsRegClass); - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return (&NVPTX::Int32RegsRegClass); - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return (&NVPTX::Int8RegsRegClass); - llvm_unreachable("Not a vector register class"); -} - -int getNVPTXVectorSize(TargetRegisterClass const *RC) { - if (RC->getID() == NVPTX::V2F32RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2F64RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I16RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I32RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I64RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V2I8RegsRegClassID) - return 2; - if (RC->getID() == NVPTX::V4F32RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I16RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I32RegsRegClassID) - return 4; - if (RC->getID() == NVPTX::V4I8RegsRegClassID) - return 4; - llvm_unreachable("Not a vector register class"); -} } NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st) - : NVPTXGenRegisterInfo(0), - Is64Bit(st.is64Bit()) {} + : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {} #define GET_REGINFO_TARGET_DESC #include "NVPTXGenRegisterInfo.inc" /// NVPTX Callee Saved Registers -const uint16_t* NVPTXRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const { +const uint16_t * +NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const uint16_t CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } // NVPTX Callee Saved Reg Classes -const TargetRegisterClass* const* +const TargetRegisterClass *const * NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 }; return CalleeSavedRegClasses; } @@ -275,34 +103,24 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -void NVPTXRegisterInfo:: -eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, - RegScavenger *RS) const { +void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); - unsigned i = 0; MachineInstr &MI = *II; - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(i+1).getImm(); + MI.getOperand(FIOperandNum + 1).getImm(); // Using I0 as the frame pointer - MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } - -int NVPTXRegisterInfo:: -getDwarfRegNum(unsigned RegNum, bool isEH) const { +int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return 0; } @@ -310,16 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return NVPTX::VRFrame; } -unsigned NVPTXRegisterInfo::getRARegister() const { - return 0; -} - -// This function eliminates ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void NVPTXRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, - // ADJCALLSTACKUP instructions. - MBB.erase(I); -} +unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 595178335ae2..d40682066142 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -17,7 +17,6 @@ #include "ManagedStringPool.h" #include "llvm/Target/TargetRegisterInfo.h" - #define GET_REGINFO_HEADER #include "NVPTXGenRegisterInfo.inc" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,34 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { private: bool Is64Bit; // Hold Strings that can be free'd all together with NVPTXRegisterInfo - ManagedStringPool ManagedStrPool; + ManagedStringPool ManagedStrPool; public: - NVPTXRegisterInfo(const TargetInstrInfo &tii, - const NVPTXSubtarget &st); - + NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st); //------------------------------------------------------ // Pure virtual functions from TargetRegisterInfo //------------------------------------------------------ // NVPTX callee saved registers - virtual const uint16_t* + virtual const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const; // NVPTX callee saved register classes - virtual const TargetRegisterClass* const * + virtual const TargetRegisterClass *const * getCalleeSavedRegClasses(const MachineFunction *MF) const; virtual BitVector getReservedRegs(const MachineFunction &MF) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, - RegScavenger *RS=NULL) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = NULL) const; virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const; @@ -78,15 +71,9 @@ public: }; - -std::string getNVPTXRegClassName (const TargetRegisterClass *RC); -std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); -bool isNVPTXVectorRegClass (const TargetRegisterClass *RC); -std::string getNVPTXElemClassName (const TargetRegisterClass *RC); -int getNVPTXVectorSize (const TargetRegisterClass *RC); -const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC); +std::string getNVPTXRegClassName(const TargetRegisterClass *RC); +std::string getNVPTXRegClassStr(const TargetRegisterClass *RC); } // end namespace llvm - #endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index ba158258b994..8d100d631683 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -37,9 +37,6 @@ foreach i = 0-395 in { def RL#i : NVPTXReg<"%rl"#i>; // 64-bit def F#i : NVPTXReg<"%f"#i>; // 32-bit float def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float - // Vectors - foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in - def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>; // Arguments def ia#i : NVPTXReg<"%ia"#i>; @@ -65,44 +62,3 @@ def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>; // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; - -class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList, - NVPTXRegClass sClass, - int e, - string n> - : NVPTXRegClass<regTypes, alignment, regList> -{ - NVPTXRegClass scalarClass=sClass; - int elems=e; - string name=n; -} -def V2F32Regs - : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)), - Float32Regs, 2, ".v2.f32">; -def V4F32Regs - : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)), - Float32Regs, 4, ".v4.f32">; -def V2I32Regs - : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)), - Int32Regs, 2, ".v2.u32">; -def V4I32Regs - : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)), - Int32Regs, 4, ".v4.u32">; -def V2F64Regs - : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)), - Float64Regs, 2, ".v2.f64">; -def V2I64Regs - : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)), - Int64Regs, 2, ".v2.u64">; -def V2I16Regs - : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)), - Int16Regs, 2, ".v2.u16">; -def V4I16Regs - : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)), - Int16Regs, 4, ".v4.u16">; -def V2I8Regs - : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)), - Int8Regs, 2, ".v2.u8">; -def V4I8Regs - : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)), - Int8Regs, 4, ".v4.u8">; diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index f1ca466266f6..e166be5a68e4 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -14,8 +14,8 @@ #ifndef LLVM_NVPTXSECTION_H #define LLVM_NVPTXSECTION_H +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCSection.h" -#include "llvm/GlobalVariable.h" #include <vector> namespace llvm { @@ -38,6 +38,8 @@ public: virtual bool isBaseAddressKnownZero() const { return true; } virtual bool UseCodeAlign() const { return false; } virtual bool isVirtualSection() const { return false; } + virtual std::string getLabelBeginName() const { return ""; } + virtual std::string getLabelEndName() const { return ""; } }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp index 2836cad4f021..83dfe120899a 100644 --- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp @@ -11,19 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Support/InstIterator.h" -#include "NVPTXUtilities.h" #include "NVPTXSplitBBatBar.h" +#include "NVPTXUtilities.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/InstIterator.h" using namespace llvm; -namespace llvm { -FunctionPass *createSplitBBatBarPass(); -} +namespace llvm { FunctionPass *createSplitBBatBarPass(); } char NVPTXSplitBBatBar::ID = 0; @@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) { // This interface will most likely not be necessary, because this pass will // not be invoked by the driver, but will be used as a prerequisite to // another pass. -FunctionPass *llvm::createSplitBBatBarPass() { - return new NVPTXSplitBBatBar(); -} +FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); } diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/lib/Target/NVPTX/NVPTXSplitBBatBar.h index 9e4d5a066d4c..bdafba9075a0 100644 --- a/lib/Target/NVPTX/NVPTXSplitBBatBar.h +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.h @@ -15,8 +15,8 @@ #ifndef NVPTX_SPLIT_BB_AT_BAR_H #define NVPTX_SPLIT_BB_AT_BAR_H -#include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 7b62cce2c65c..2dcd73dcff9c 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -22,27 +22,23 @@ using namespace llvm; // Select Driver Interface #include "llvm/Support/CommandLine.h" namespace { -cl::opt<NVPTX::DrvInterface> -DriverInterface(cl::desc("Choose driver interface:"), - cl::values( - clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), - clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), - clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), - clEnumValEnd), - cl::init(NVPTX::NVCL)); +cl::opt<NVPTX::DrvInterface> DriverInterface( + cl::desc("Choose driver interface:"), + cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), + clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), + clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd), + cl::init(NVPTX::NVCL)); } NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -: NVPTXGenSubtargetInfo(TT, CPU, FS), - Is64Bit(is64Bit), - PTXVersion(0), - SmVersion(10) { + : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), + SmVersion(20) { drvInterface = DriverInterface; // Provide the default CPU if none - std::string defCPU = "sm_10"; + std::string defCPU = "sm_20"; ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 3cfd9718e541..670077daaa69 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -14,8 +14,8 @@ #ifndef NVPTXSUBTARGET_H #define NVPTXSUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" #include "NVPTX.h" +#include "llvm/Target/TargetSubtargetInfo.h" #define GET_SUBTARGETINFO_HEADER #include "NVPTXGenSubtargetInfo.inc" @@ -25,7 +25,7 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - + std::string TargetName; NVPTX::DrvInterface drvInterface; bool Is64Bit; @@ -57,16 +57,14 @@ public: bool hasF32FTZ() const { return SmVersion >= 20; } bool hasFMAF32() const { return SmVersion >= 20; } bool hasFMAF64() const { return SmVersion >= 13; } + bool hasLDG() const { return SmVersion >= 32; } bool hasLDU() const { return SmVersion >= 20; } bool hasGenericLdSt() const { return SmVersion >= 20; } inline bool hasHWROT32() const { return false; } - inline bool hasSWROT32() const { - return true; - } - inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; } + inline bool hasSWROT32() const { return true; } + inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } - bool is64Bit() const { return Is64Bit; } unsigned int getSmVersion() const { return SmVersion; } @@ -95,4 +93,4 @@ public: } // End llvm namespace -#endif // NVPTXSUBTARGET_H +#endif // NVPTXSUBTARGET_H diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index cbb490003d37..67ca6b58e5a6 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -12,27 +12,30 @@ //===----------------------------------------------------------------------===// #include "NVPTXTargetMachine.h" -#include "NVPTX.h" -#include "NVPTXSplitBBatBar.h" -#include "NVPTXLowerAggrCopies.h" #include "MCTargetDesc/NVPTXMCAsmInfo.h" +#include "NVPTX.h" #include "NVPTXAllocaHoisting.h" -#include "llvm/PassManager.h" +#include "NVPTXLowerAggrCopies.h" +#include "NVPTXSplitBBatBar.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -41,14 +44,12 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/TargetRegistry.h" - using namespace llvm; +namespace llvm { +void initializeNVVMReflectPass(PassRegistry&); +} extern "C" void LLVMInitializeNVPTXTarget() { // Register the target. @@ -58,53 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() { RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32); RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64); + // FIXME: This pass is really intended to be invoked during IR optimization, + // but it's very NVPTX-specific. + initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); } -NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, - StringRef TT, - StringRef CPU, - StringRef FS, - const TargetOptions& Options, - Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64bit) -: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64bit), - DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), - STTI(&TLInfo), VTTI(&TLInfo) -/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { -} - - +NVPTXTargetMachine::NVPTXTargetMachine( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), + FrameLowering( + *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {} void NVPTXTargetMachine32::anchor() {} -NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { -} +NVPTXTargetMachine32::NVPTXTargetMachine32( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} void NVPTXTargetMachine64::anchor() {} -NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { -} - +NVPTXTargetMachine64::NVPTXTargetMachine64( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} namespace llvm { class NVPTXPassConfig : public TargetPassConfig { public: NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) {} NVPTXTargetMachine &getNVPTXTargetMachine() const { return getTM<NVPTXTargetMachine>(); @@ -125,10 +115,7 @@ bool NVPTXPassConfig::addInstSelector() { addPass(createSplitBBatBarPass()); addPass(createAllocaHoisting()); addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); - addPass(createVectorElementizePass(getNVPTXTargetMachine())); return false; } -bool NVPTXPassConfig::addPreRegAlloc() { - return false; -} +bool NVPTXPassConfig::addPreRegAlloc() { return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 11bc9d4fa698..5fbcf735b48f 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -11,77 +11,64 @@ // //===----------------------------------------------------------------------===// - #ifndef NVPTX_TARGETMACHINE_H #define NVPTX_TARGETMACHINE_H -#include "NVPTXInstrInfo.h" +#include "ManagedStringPool.h" +#include "NVPTXFrameLowering.h" #include "NVPTXISelLowering.h" +#include "NVPTXInstrInfo.h" #include "NVPTXRegisterInfo.h" #include "NVPTXSubtarget.h" -#include "NVPTXFrameLowering.h" -#include "ManagedStringPool.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { - NVPTXSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - NVPTXInstrInfo InstrInfo; - NVPTXTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + NVPTXSubtarget Subtarget; + const DataLayout DL; // Calculates type size & alignment + NVPTXInstrInfo InstrInfo; + NVPTXTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; // NVPTX does not have any call stack frame, but need a NVPTX specific // FrameLowering class because TargetFrameLowering is abstract. - NVPTXFrameLowering FrameLowering; + NVPTXFrameLowering FrameLowering; // Hold Strings that can be free'd all together with NVPTXTargetMachine - ManagedStringPool ManagedStrPool; - - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; + ManagedStringPool ManagedStrPool; //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); public: - NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OP, - bool is64bit); + NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const DataLayout *getDataLayout() const { return &DL;} - virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} + virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const DataLayout *getDataLayout() const { return &DL; } + virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const NVPTXRegisterInfo *getRegisterInfo() const { return &(InstrInfo.getRegisterInfo()); } virtual NVPTXTargetLowering *getTargetLowering() const { - return const_cast<NVPTXTargetLowering*>(&TLInfo); + return const_cast<NVPTXTargetLowering *>(&TLInfo); } virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } //virtual bool addInstSelector(PassManagerBase &PM, // CodeGenOpt::Level OptLevel); @@ -89,22 +76,19 @@ public: //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level); ManagedStringPool *getManagedStrPool() const { - return const_cast<ManagedStringPool*>(&ManagedStrPool); + return const_cast<ManagedStringPool *>(&ManagedStrPool); } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); // Emission of machine code through JITCodeEmitter is not supported. - virtual bool addPassesToEmitMachineCode(PassManagerBase &, - JITCodeEmitter &, + virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, bool = true) { return true; } // Emission of machine code through MCJIT is not supported. - virtual bool addPassesToEmitMC(PassManagerBase &, - MCContext *&, - raw_ostream &, + virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) { return true; } @@ -129,7 +113,6 @@ public: CodeGenOpt::Level OL); }; - } // end namespace llvm #endif diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index b5698a2fc08f..6ab0e08ad091 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -46,45 +46,43 @@ public: } virtual void Initialize(MCContext &ctx, const TargetMachine &TM) { - TextSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getText()); - DataSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getDataRel()); - BSSSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getBSS()); - ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getReadOnly()); + TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText()); + DataSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel()); + BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS()); + ReadOnlySection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly()); - StaticCtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - StaticDtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - LSDASection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - EHFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfStrSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLocSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); + StaticCtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + StaticDtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + LSDASection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + EHFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfAbbrevSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfPubTypesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfDebugInlineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfStrSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLocSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfARangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfRangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfMacroInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } virtual const MCSection *getSectionForConstant(SectionKind Kind) const { @@ -93,8 +91,7 @@ public: virtual const MCSection * getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, - const TargetMachine &TM) const { + Mangler *Mang, const TargetMachine &TM) const { return DataSection; } diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 3f52251cc1b2..6786eb02240c 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -12,11 +12,11 @@ #include "NVPTXUtilities.h" #include "NVPTX.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/Constants.h" -#include "llvm/Operator.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include <algorithm> #include <cstring> #include <map> @@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t; ManagedStatic<per_module_annot_t> annotationCache; - static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); @@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(prop && "Annotation property not a string"); // value - ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1)); + ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1)); assert(Val && "Value operand not a constant int"); std::string keyname = prop->getString().str(); @@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop, bool llvm::isTexture(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], + annot)) { assert((annot == 1) && "Unexpected annotation on a texture symbol"); return true; } @@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) { bool llvm::isSurface(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], + annot)) { assert((annot == 1) && "Unexpected annotation on a surface symbol"); return true; } @@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) { bool llvm::isSampler(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { assert((annot == 1) && "Unexpected annotation on a sampler symbol"); return true; } @@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) { if (const Argument *arg = dyn_cast<Argument>(&val)) { const Function *func = arg->getParent(); std::vector<unsigned> annot; - if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findAllNVVMAnnotation( + func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) { } bool llvm::getMaxNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x)); } bool llvm::getMaxNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y)); } bool llvm::getMaxNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z)); } bool llvm::getReqNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x)); } bool llvm::getReqNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y)); } bool llvm::getReqNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z)); } bool llvm::getMinCTASm(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x)); } bool llvm::isKernelFunction(const Function &F) { unsigned x = 0; - bool retval = llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], - x); + bool retval = llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x); if (retval == false) { // There is no NVVM metadata, check the calling convention if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) @@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) { else return false; } - return (x==1); + return (x == 1); } bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { std::vector<unsigned> Vs; - bool retval = llvm::findAllNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], - Vs); + bool retval = llvm::findAllNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs); if (retval == false) return false; - for (int i=0, e=Vs.size(); i<e; i++) { + for (int i = 0, e = Vs.size(); i < e; i++) { unsigned v = Vs[i]; - if ( (v >> 16) == index ) { - align = v & 0xFFFF; + if ((v >> 16) == index) { + align = v & 0xFFFF; return true; } } @@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { if (MDNode *alignNode = I.getMetadata("callalign")) { - for (int i=0, n = alignNode->getNumOperands(); - i<n; i++) { + for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) { if (const ConstantInt *CI = - dyn_cast<ConstantInt>(alignNode->getOperand(i))) { + dyn_cast<ConstantInt>(alignNode->getOperand(i))) { unsigned v = CI->getZExtValue(); - if ( (v>>16) == index ) { + if ((v >> 16) == index) { align = v & 0xFFFF; return true; } - if ( (v>>16) > index ) { + if ((v >> 16) > index) { return false; } } @@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { // consider several special intrinsics in striping pointer casts, and // provide an option to ignore GEP indicies for find out the base address only // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - bool ignore_GEP_indices) { +const Value * +llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) { V = V->stripPointerCasts(); while (true) { if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { @@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V, // - ignore GEP indicies for find out the base address only, and // - tracking PHINode // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - std::set<const Value *> &processed) { +const Value * +llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) { if (processed.find(V) != processed.end()) return NULL; processed.insert(V); @@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V, return V; } - // The following are some useful utilities for debuggung BasicBlock *llvm::getParentBlock(Value *v) { diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index fe6ad559e9df..a208004297d0 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -14,17 +14,16 @@ #ifndef NVPTXUTILITIES_H #define NVPTXUTILITIES_H -#include "llvm/Value.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" #include <cstdarg> #include <set> #include <string> #include <vector> -namespace llvm -{ +namespace llvm { #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly" #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" @@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID); /// to pass into type construction of CallInst ctors. This turns a null /// terminated list of pointers (or other value types) into a real live vector. /// -template<typename T> -inline std::vector<T> make_vector(T A, ...) { +template <typename T> inline std::vector<T> make_vector(T A, ...) { va_list Args; va_start(Args, A); std::vector<T> Result; @@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) { bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); -const Value *skipPointerTransfer(const Value *V, - std::set<const Value *> &processed); +const Value * +skipPointerTransfer(const Value *V, std::set<const Value *> &processed); BasicBlock *getParentBlock(Value *v); Function *getParentFunction(Value *v); void dumpBlock(Value *v, char *blockName); diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp index 6a0e5328f62f..5f074b33a2d4 100644 --- a/lib/Target/NVPTX/NVPTXutil.cpp +++ b/lib/Target/NVPTX/NVPTXutil.cpp @@ -18,8 +18,7 @@ using namespace llvm; namespace llvm { -bool isParamLoad(const MachineInstr *MI) -{ +bool isParamLoad(const MachineInstr *MI) { if ((MI->getOpcode() != NVPTX::LD_i32_avar) && (MI->getOpcode() != NVPTX::LD_i64_avar)) return false; @@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI) return true; } -#define DATA_MASK 0x7f -#define DIGIT_WIDTH 7 -#define MORE_BYTES 0x80 +#define DATA_MASK 0x7f +#define DIGIT_WIDTH 7 +#define MORE_BYTES 0x80 -static int encode_leb128(uint64_t val, int *nbytes, - char *space, int splen) -{ +static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) { char *a; char *end = space + splen; @@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes, #undef DIGIT_WIDTH #undef MORE_BYTES -uint64_t encode_leb128(const char *str) -{ - union { uint64_t x; char a[8]; } temp64; +uint64_t encode_leb128(const char *str) { + union { + uint64_t x; + char a[8]; + } temp64; temp64.x = 0; - for (unsigned i=0,e=strlen(str); i!=e; ++i) - temp64.a[i] = str[e-1-i]; + for (unsigned i = 0, e = strlen(str); i != e; ++i) + temp64.a[i] = str[e - 1 - i]; char encoded[16]; int nbytes; int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); - (void)retval; - assert(retval == 0 && - "Encoding to leb128 failed"); + (void) retval; + assert(retval == 0 && "Encoding to leb128 failed"); assert(nbytes <= 8 && "Cannot support register names with leb128 encoding > 8 bytes"); temp64.x = 0; - for (int i=0; i<nbytes; ++i) + for (int i = 0; i < nbytes; ++i) temp64.a[i] = encoded[i]; return temp64.x; diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp new file mode 100644 index 000000000000..0ad62ce39b0d --- /dev/null +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -0,0 +1,177 @@ +//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces occurences of __nvvm_reflect("string") with an +// integer based on -nvvm-reflect-list string=<int> option given to this pass. +// If an undefined string value is seen in a call to __nvvm_reflect("string"), +// a default value of 0 will be used. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Pass.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <map> +#include <sstream> +#include <string> +#include <vector> + +#define NVVM_REFLECT_FUNCTION "__nvvm_reflect" + +using namespace llvm; + +namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } + +namespace { +class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass { +private: + StringMap<int> VarMap; + typedef DenseMap<std::string, int>::iterator VarMapIter; + Function *ReflectFunction; + +public: + static char ID; + NVVMReflect() : ModulePass(ID) { + VarMap.clear(); + ReflectFunction = 0; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + virtual bool runOnModule(Module &); + + void setVarMap(); +}; +} + +static cl::opt<bool> +NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), + cl::desc("NVVM reflection, enabled by default")); + +char NVVMReflect::ID = 0; +INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", + "Replace occurences of __nvvm_reflect() calls with 0/1", false, + false) + +static cl::list<std::string> +ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), + cl::desc("A list of string=num assignments"), + cl::ValueRequired); + +/// The command line can look as follows : +/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2 +/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the +/// ReflectList vector. First, each of ReflectList[i] is 'split' +/// using "," as the delimiter. Then each of this part is split +/// using "=" as the delimiter. +void NVVMReflect::setVarMap() { + for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) { + DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n"); + SmallVector<StringRef, 4> NameValList; + StringRef(ReflectList[i]).split(NameValList, ","); + for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) { + SmallVector<StringRef, 2> NameValPair; + NameValList[j].split(NameValPair, "="); + assert(NameValPair.size() == 2 && "name=val expected"); + std::stringstream ValStream(NameValPair[1]); + int Val; + ValStream >> Val; + assert((!(ValStream.fail())) && "integer value expected"); + VarMap[NameValPair[0]] = Val; + } + } +} + +bool NVVMReflect::runOnModule(Module &M) { + if (!NVVMReflectEnabled) + return false; + + setVarMap(); + + ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); + + // If reflect function is not used, then there will be + // no entry in the module. + if (ReflectFunction == 0) + return false; + + // Validate _reflect function + assert(ReflectFunction->isDeclaration() && + "_reflect function should not have a body"); + assert(ReflectFunction->getReturnType()->isIntegerTy() && + "_reflect's return type should be integer"); + + std::vector<Instruction *> ToRemove; + + // Go through the uses of ReflectFunction in this Function. + // Each of them should a CallInst with a ConstantArray argument. + // First validate that. If the c-string corresponding to the + // ConstantArray can be found successfully, see if it can be + // found in VarMap. If so, replace the uses of CallInst with the + // value found in VarMap. If not, replace the use with value 0. + for (Value::use_iterator I = ReflectFunction->use_begin(), + E = ReflectFunction->use_end(); + I != E; ++I) { + assert(isa<CallInst>(*I) && "Only a call instruction can use _reflect"); + CallInst *Reflect = cast<CallInst>(*I); + + assert((Reflect->getNumOperands() == 2) && + "Only one operand expect for _reflect function"); + // In cuda, we will have an extra constant-to-generic conversion of + // the string. + const Value *conv = Reflect->getArgOperand(0); + assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion"); + const CallInst *ConvCall = cast<CallInst>(conv); + const Value *str = ConvCall->getArgOperand(0); + assert(isa<ConstantExpr>(str) && + "Format of _reflect function not recognized"); + const ConstantExpr *GEP = cast<ConstantExpr>(str); + + const Value *Sym = GEP->getOperand(0); + assert(isa<Constant>(Sym) && "Format of _reflect function not recognized"); + + const Constant *SymStr = cast<Constant>(Sym); + + assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) && + "Format of _reflect function not recognized"); + + assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() && + "Format of _reflect function not recognized"); + + std::string ReflectArg = + cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString(); + + ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); + DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); + + int ReflectVal = 0; // The default value is 0 + if (VarMap.find(ReflectArg) != VarMap.end()) { + ReflectVal = VarMap[ReflectArg]; + } + Reflect->replaceAllUsesWith( + ConstantInt::get(Reflect->getType(), ReflectVal)); + ToRemove.push_back(Reflect); + } + if (ToRemove.size() == 0) + return false; + + for (unsigned i = 0, e = ToRemove.size(); i != e; ++i) + ToRemove[i]->eraseFromParent(); + return true; +} diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp index f3624b9f23c7..cc7d4dc5ece7 100644 --- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp +++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "NVPTX.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64; extern "C" void LLVMInitializeNVPTXTargetInfo() { RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx", - "NVIDIA PTX 32-bit"); + "NVIDIA PTX 32-bit"); RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64", - "NVIDIA PTX 64-bit"); + "NVIDIA PTX 64-bit"); } diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp deleted file mode 100644 index 8043e2de0972..000000000000 --- a/lib/Target/NVPTX/VectorElementize.cpp +++ /dev/null @@ -1,1248 +0,0 @@ -//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass converts operations on vector types to operations on their -// element types. -// -// For generic binary and unary vector instructions, the conversion is simple. -// Suppose we have -// av = bv Vop cv -// where av, bv, and cv are vector virtual registers, and Vop is a vector op. -// This gets converted to the following : -// a1 = b1 Sop c1 -// a2 = b2 Sop c2 -// -// VectorToScalarMap maintains the vector vreg to scalar vreg mapping. -// For the above example, the map will look as follows: -// av => [a1, a2] -// bv => [b1, b2] -// -// In addition, initVectorInfo creates the following opcode->opcode map. -// Vop => Sop -// OtherVop => OtherSop -// ... -// -// For vector specific instructions like vecbuild, vecshuffle etc, the -// conversion is different. Look at comments near the functions with -// prefix createVec<...>. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/Passes.h" -#include "llvm/Constant.h" -#include "llvm/Instructions.h" -#include "llvm/Function.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "NVPTX.h" -#include "NVPTXTargetMachine.h" - -using namespace llvm; - -namespace { - -class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &F); - - NVPTXTargetMachine &TM; - MachineRegisterInfo *MRI; - const NVPTXRegisterInfo *RegInfo; - const NVPTXInstrInfo *InstrInfo; - - llvm::DenseMap<const TargetRegisterClass *, const TargetRegisterClass *> - RegClassMap; - llvm::DenseMap<unsigned, bool> SimpleMoveMap; - - llvm::DenseMap<unsigned, SmallVector<unsigned, 4> > VectorToScalarMap; - - bool isVectorInstr(MachineInstr *); - - SmallVector<unsigned, 4> getScalarRegisters(unsigned); - unsigned getScalarVersion(unsigned); - unsigned getScalarVersion(MachineInstr *); - - bool isVectorRegister(unsigned); - const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC); - unsigned numCopiesNeeded(MachineInstr *); - - void createLoadCopy(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - void createStoreCopy(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - - void createVecDest(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - - void createCopies(MachineFunction&, MachineInstr *, - std::vector<MachineInstr *>&); - - unsigned copyProp(MachineFunction&); - unsigned removeDeadMoves(MachineFunction&); - - void elementize(MachineFunction&); - - bool isSimpleMove(MachineInstr *); - - void createVecShuffle(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - - void createVecExtract(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - - void createVecInsert(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - - void createVecBuild(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies); - -public: - - static char ID; // Pass identification, replacement for typeid - VectorElementize(NVPTXTargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) {} - - virtual const char *getPassName() const { - return "Convert LLVM vector types to their element types"; - } -}; - -char VectorElementize::ID = 1; -} - -static cl::opt<bool> -RemoveRedundantMoves("nvptx-remove-redundant-moves", - cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"), - cl::init(true)); - -#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \ - >> NVPTX::VecInstTypeShift) -#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP) -#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad) -#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore) -#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild) -#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle) -#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract) -#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert) -#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest) - -bool VectorElementize::isSimpleMove(MachineInstr *mi) { - if (mi->isCopy()) - return true; - unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask) - >> NVPTX::SimpleMoveShift; - return (TSFlags == 1); -} - -bool VectorElementize::isVectorInstr(MachineInstr *mi) { - if ((mi->getOpcode() == NVPTX::PHI) || - (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) { - MachineOperand dest = mi->getOperand(0); - return isVectorRegister(dest.getReg()); - } - return ISVECINST(mi); -} - -unsigned VectorElementize::getScalarVersion(MachineInstr *mi) { - return getScalarVersion(mi->getOpcode()); -} - -///============================================================================= -///Instr is assumed to be a vector instruction. For most vector instructions, -///the size of the destination vector register gives the number of scalar copies -///needed. For VecStore, size of getOperand(1) gives the number of scalar copies -///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the -///number of scalar copies needed. -///============================================================================= -unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) { - unsigned numDefs=0; - unsigned def; - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - - if (!oper.isReg()) continue; - if (!oper.isDef()) continue; - def = i; - numDefs++; - } - assert((numDefs <= 1) && "Only 0 or 1 defs supported"); - - if (numDefs == 1) { - unsigned regnum = Instr->getOperand(def).getReg(); - if (ISVECEXTRACT(Instr)) - regnum = Instr->getOperand(1).getReg(); - return getNVPTXVectorSize(MRI->getRegClass(regnum)); - } - else if (numDefs == 0) { - assert(ISVECSTORE(Instr) - && "Only 0 def instruction supported is vector store"); - - unsigned regnum = Instr->getOperand(0).getReg(); - return getNVPTXVectorSize(MRI->getRegClass(regnum)); - } - return 1; -} - -const TargetRegisterClass *VectorElementize:: -getScalarRegClass(const TargetRegisterClass *RC) { - assert(isNVPTXVectorRegClass(RC) && - "Not a vector register class"); - return getNVPTXElemClass(RC); -} - -bool VectorElementize::isVectorRegister(unsigned reg) { - const TargetRegisterClass *RC=MRI->getRegClass(reg); - return isNVPTXVectorRegClass(RC); -} - -///============================================================================= -///For every vector register 'v' that is not already in the VectorToScalarMap, -///create n scalar registers of the corresponding element type, where n -///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap. -///============================================================================= -SmallVector<unsigned, 4> VectorElementize::getScalarRegisters(unsigned regnum) { - assert(isVectorRegister(regnum) && "Expecting a vector register here"); - // Create the scalar registers and put them in the map, if not already there. - if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) { - const TargetRegisterClass *vecClass = MRI->getRegClass(regnum); - const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass); - - SmallVector<unsigned, 4> temp; - - for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i) - temp.push_back(MRI->createVirtualRegister(scalarClass)); - - VectorToScalarMap[regnum] = temp; - } - return VectorToScalarMap[regnum]; -} - -///============================================================================= -///For a vector load of the form -///va <= ldv2 [addr] -///the following multi output instruction is created : -///[v1, v2] <= LD [addr] -///Look at NVPTXVector.td for the definitions of multi output loads. -///============================================================================= -void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstr *copy=copies[0]; - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - // Remove the dest, that should be a vector operand. - MachineOperand dest = copy->getOperand(0); - unsigned regnum = dest.getReg(); - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector<MachineOperand> otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) { - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); - } - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy->addOperand(otherOperands[i]); - -} - -///============================================================================= -///For a vector store of the form -///stv2 va, [addr] -///the following multi input instruction is created : -///ST v1, v2, [addr] -///Look at NVPTXVector.td for the definitions of multi input stores. -///============================================================================= -void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstr *copy=copies[0]; - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - MachineOperand src = copy->getOperand(0); - unsigned regnum = src.getReg(); - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector<MachineOperand> otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false)); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy->addOperand(otherOperands[i]); -} - -///============================================================================= -///va <= shufflev2 vb, vc, <i1>, <i2> -///gets converted to 2 moves into a1 and a2. The source of the moves depend on -///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For -///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move -///instructions will be -///a1 <= c2 -///a2 <= b1 -///============================================================================= -void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - unsigned src1regnum = Instr->getOperand(1).getReg(); - unsigned src2regnum = Instr->getOperand(2).getReg(); - - SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); - SmallVector<unsigned, 4> src1 = getScalarRegisters(src1regnum); - SmallVector<unsigned, 4> src2 = getScalarRegisters(src2regnum); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; i<numcopies; i++) { - MachineInstr *copy = BuildMI(F, DL, - InstrInfo->get(getScalarVersion(Instr)), dest[i]); - MachineOperand which=Instr->getOperand(3+i); - assert(which.isImm() && "Shuffle operand not a constant"); - - int src=which.getImm(); - int elem=src%numcopies; - - if (which.getImm() < numcopies) - copy->addOperand(MachineOperand::CreateReg(src1[elem], false)); - else - copy->addOperand(MachineOperand::CreateReg(src2[elem], false)); - copies.push_back(copy); - } -} - -///============================================================================= -///a <= extractv2 va, <i1> -///gets turned into a simple move to the scalar register a. The source depends -///on i1. -///============================================================================= -void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned srcregnum = Instr->getOperand(1).getReg(); - - SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); - - MachineOperand which = Instr->getOperand(2); - assert(which.isImm() && "Extract operand not a constant"); - - DebugLoc DL = Instr->getDebugLoc(); - - MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), - Instr->getOperand(0).getReg()); - copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false)); - - copies.push_back(copy); -} - -///============================================================================= -///va <= vecinsertv2 vb, c, <i1> -///This instruction copies all elements of vb to va, except the 'i1'th element. -///The scalar value c becomes the 'i1'th element of va. -///This gets translated to 2 (4 for vecinsertv4) moves. -///============================================================================= -void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - unsigned srcregnum = Instr->getOperand(1).getReg(); - - SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); - SmallVector<unsigned, 4> src = getScalarRegisters(srcregnum); - - MachineOperand which=Instr->getOperand(3); - assert(which.isImm() && "Insert operand not a constant"); - unsigned int elem=which.getImm(); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; i<numcopies; i++) { - MachineInstr *copy = BuildMI(F, DL, - InstrInfo->get(getScalarVersion(Instr)), dest[i]); - - if (i != elem) - copy->addOperand(MachineOperand::CreateReg(src[i], false)); - else - copy->addOperand(Instr->getOperand(2)); - - copies.push_back(copy); - } - -} - -///============================================================================= -///va <= buildv2 b1, b2 -///gets translated to -///a1 <= b1 -///a2 <= b2 -///============================================================================= -void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - unsigned numcopies=numCopiesNeeded(Instr); - - unsigned destregnum = Instr->getOperand(0).getReg(); - - SmallVector<unsigned, 4> dest = getScalarRegisters(destregnum); - - DebugLoc DL = Instr->getDebugLoc(); - - for (unsigned i=0; i<numcopies; i++) { - MachineInstr *copy = BuildMI(F, DL, - InstrInfo->get(getScalarVersion(Instr)), dest[i]); - - copy->addOperand(Instr->getOperand(1+i)); - - copies.push_back(copy); - } - -} - -///============================================================================= -///For a tex inst of the form -///va <= op [scalar operands] -///the following multi output instruction is created : -///[v1, v2] <= op' [scalar operands] -///============================================================================= -void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - copies.push_back(F.CloneMachineInstr(Instr)); - - MachineInstr *copy=copies[0]; - copy->setDesc(InstrInfo->get(getScalarVersion(copy))); - - // Remove the dest, that should be a vector operand. - MachineOperand dest = copy->getOperand(0); - unsigned regnum = dest.getReg(); - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->RemoveOperand(0); - - std::vector<MachineOperand> otherOperands; - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - otherOperands.push_back(copy->getOperand(i)); - - for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) - copy->RemoveOperand(0); - - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); - - for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy->addOperand(otherOperands[i]); -} - -///============================================================================= -///Look at the vector instruction type and dispatch to the createVec<...> -///function that creates the scalar copies. -///============================================================================= -void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, - std::vector<MachineInstr *>& copies) { - if (ISVECLOAD(Instr)) { - createLoadCopy(F, Instr, copies); - return; - } - if (ISVECSTORE(Instr)) { - createStoreCopy(F, Instr, copies); - return; - } - if (ISVECSHUFFLE(Instr)) { - createVecShuffle(F, Instr, copies); - return; - } - if (ISVECEXTRACT(Instr)) { - createVecExtract(F, Instr, copies); - return; - } - if (ISVECINSERT(Instr)) { - createVecInsert(F, Instr, copies); - return; - } - if (ISVECDEST(Instr)) { - createVecDest(F, Instr, copies); - return; - } - if (ISVECBUILD(Instr)) { - createVecBuild(F, Instr, copies); - return; - } - - unsigned numcopies=numCopiesNeeded(Instr); - - for (unsigned i=0; i<numcopies; ++i) - copies.push_back(F.CloneMachineInstr(Instr)); - - for (unsigned i=0; i<numcopies; ++i) { - MachineInstr *copy = copies[i]; - - std::vector<MachineOperand> allOperands; - std::vector<bool> isDef; - - for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) { - MachineOperand oper = copy->getOperand(j); - allOperands.push_back(oper); - if (oper.isReg()) - isDef.push_back(oper.isDef()); - else - isDef.push_back(false); - } - - for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) - copy->RemoveOperand(0); - - copy->setDesc(InstrInfo->get(getScalarVersion(Instr))); - - for (unsigned j=0, e=allOperands.size(); j!=e; ++j) { - MachineOperand oper=allOperands[j]; - if (oper.isReg()) { - unsigned regnum = oper.getReg(); - if (isVectorRegister(regnum)) { - - SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j])); - } - else - copy->addOperand(oper); - } - else - copy->addOperand(oper); - } - } -} - -///============================================================================= -///Scan through all basic blocks, looking for vector instructions. -///For each vector instruction I, insert the scalar copies before I, and -///add I into toRemove vector. Finally remove all instructions in toRemove. -///============================================================================= -void VectorElementize::elementize(MachineFunction &F) { - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); - BI!=BE; ++BI) { - MachineBasicBlock *BB = &*BI; - - std::vector<MachineInstr *> copies; - std::vector<MachineInstr *> toRemove; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); - II!=IE; ++II) { - MachineInstr *Instr = &*II; - - if (!isVectorInstr(Instr)) - continue; - - copies.clear(); - createCopies(F, Instr, copies); - for (unsigned i=0, e=copies.size(); i!=e; ++i) - BB->insert(II, copies[i]); - - assert((copies.size() > 0) && "Problem in createCopies"); - toRemove.push_back(Instr); - } - for (unsigned i=0, e=toRemove.size(); i!=e; ++i) - F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i])); - } -} - -///============================================================================= -///a <= b -///... -///... -///x <= op(a, ...) -///gets converted to -/// -///x <= op(b, ...) -///The original move is still present. This works on SSA form machine code. -///Note that a <= b should be a simple vreg-to-vreg move instruction. -///TBD : I didn't find a function that can do replaceOperand, so I remove -///all operands and add all of them again, replacing the one while adding. -///============================================================================= -unsigned VectorElementize::copyProp(MachineFunction &F) { - unsigned numReplacements = 0; - - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; - ++BI) { - MachineBasicBlock *BB = &*BI; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; - ++II) { - MachineInstr *Instr = &*II; - - // Don't do copy propagation on PHI as it will cause unnecessary - // live range overlap. - if ((Instr->getOpcode() == TargetOpcode::PHI) || - (Instr->getOpcode() == TargetOpcode::DBG_VALUE)) - continue; - - bool needsReplacement = false; - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - if (!oper.isReg()) continue; - if (oper.isDef()) continue; - if (!RegInfo->isVirtualRegister(oper.getReg())) continue; - - MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); - - if (!defInstr) continue; - - if (!isSimpleMove(defInstr)) continue; - - MachineOperand defSrc = defInstr->getOperand(1); - if (!defSrc.isReg()) continue; - if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue; - - needsReplacement = true; - - } - if (!needsReplacement) continue; - - numReplacements++; - - std::vector<MachineOperand> operands; - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { - MachineOperand oper = Instr->getOperand(i); - bool flag = false; - do { - if (!(oper.isReg())) - break; - if (oper.isDef()) - break; - if (!(RegInfo->isVirtualRegister(oper.getReg()))) - break; - MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); - if (!(isSimpleMove(defInstr))) - break; - MachineOperand defSrc = defInstr->getOperand(1); - if (!(defSrc.isReg())) - break; - if (!(RegInfo->isVirtualRegister(defSrc.getReg()))) - break; - operands.push_back(defSrc); - flag = true; - } while (0); - if (flag == false) - operands.push_back(oper); - } - - for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) - Instr->RemoveOperand(0); - for (unsigned i=0, e=operands.size(); i!=e; ++i) - Instr->addOperand(operands[i]); - - } - } - return numReplacements; -} - -///============================================================================= -///Look for simple vreg-to-vreg instructions whose use_empty() is true, add -///them to deadMoves vector. Then remove all instructions in deadMoves. -///============================================================================= -unsigned VectorElementize::removeDeadMoves(MachineFunction &F) { - std::vector<MachineInstr *> deadMoves; - for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; - ++BI) { - MachineBasicBlock *BB = &*BI; - - for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; - ++II) { - MachineInstr *Instr = &*II; - - if (!isSimpleMove(Instr)) continue; - - MachineOperand dest = Instr->getOperand(0); - assert(dest.isReg() && "dest of move not a register"); - assert(RegInfo->isVirtualRegister(dest.getReg()) && - "dest of move not a virtual register"); - - if (MRI->use_empty(dest.getReg())) { - deadMoves.push_back(Instr); - } - } - } - - for (unsigned i=0, e=deadMoves.size(); i!=e; ++i) - F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i])); - - return deadMoves.size(); -} - -///============================================================================= -///Main function for this pass. -///============================================================================= -bool VectorElementize::runOnMachineFunction(MachineFunction &F) { - MRI = &F.getRegInfo(); - - RegInfo = TM.getRegisterInfo(); - InstrInfo = TM.getInstrInfo(); - - VectorToScalarMap.clear(); - - elementize(F); - - if (RemoveRedundantMoves) - while (1) { - if (copyProp(F) == 0) break; - removeDeadMoves(F); - } - - return true; -} - -FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) { - return new VectorElementize(tm); -} - -unsigned VectorElementize::getScalarVersion(unsigned opcode) { - if (opcode == NVPTX::PHI) - return opcode; - if (opcode == NVPTX::IMPLICIT_DEF) - return opcode; - switch(opcode) { - default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td"); - case TargetOpcode::COPY: return TargetOpcode::COPY; - case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr; - case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr; - case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr; - case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr; - case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr; - case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr; - case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr; - case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr; - case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr; - case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr; - case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr; - case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr; - case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr; - case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr; - case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32; - case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32; - case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32; - case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32; - case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32; - case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr; - case NVPTX::F32MADV2: return NVPTX::FMAD32rrr; - case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr; - case NVPTX::F32MADV4: return NVPTX::FMAD32rrr; - case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr; - case NVPTX::F32FMAV2: return NVPTX::FMA32rrr; - case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr; - case NVPTX::F32FMAV4: return NVPTX::FMA32rrr; - case NVPTX::F64FMAV2: return NVPTX::FMA64rrr; - case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32; - case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64; - case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32; - case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32; - case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64; - case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32; - case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32; - case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64; - case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32; - case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32; - case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64; - case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32; - case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32; - case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64; - case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32; - case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32; - case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64; - case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32; - case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32; - case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64; - case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32; - case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32; - case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64; - case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32; - case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32; - case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64; - case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32; - case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32; - case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64; - case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32; - case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32; - case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64; - case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32; - case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32; - case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64; - case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32; - case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32; - case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64; - case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32; - case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32; - case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64; - case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32; - case NVPTX::I16MADV2: return NVPTX::MAD16rrr; - case NVPTX::I16MADV4: return NVPTX::MAD16rrr; - case NVPTX::I32MADV2: return NVPTX::MAD32rrr; - case NVPTX::I32MADV4: return NVPTX::MAD32rrr; - case NVPTX::I64MADV2: return NVPTX::MAD64rrr; - case NVPTX::I8MADV2: return NVPTX::MAD8rrr; - case NVPTX::I8MADV4: return NVPTX::MAD8rrr; - case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr; - case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr; - case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr; - case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr; - case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr; - case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr; - case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr; - case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr; - case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr; - case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr; - case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr; - case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr; - case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr; - case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr; - case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr; - case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr; - case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr; - case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr; - case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr; - case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr; - case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr; - case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr; - case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr; - case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr; - case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr; - case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; - case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec; - case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz; - case NVPTX::V2F32Div: return NVPTX::FDIV32rr; - case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr; - case NVPTX::V2F64Div: return NVPTX::FDIV64rr; - case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr; - case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr; - case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr; - case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr; - case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr; - case NVPTX::V2f32Extract: return NVPTX::FMOV32rr; - case NVPTX::V2f32Insert: return NVPTX::FMOV32rr; - case NVPTX::V2f32Mov: return NVPTX::FMOV32rr; - case NVPTX::V2f64Extract: return NVPTX::FMOV64rr; - case NVPTX::V2f64Insert: return NVPTX::FMOV64rr; - case NVPTX::V2f64Mov: return NVPTX::FMOV64rr; - case NVPTX::V2i16Extract: return NVPTX::IMOV16rr; - case NVPTX::V2i16Insert: return NVPTX::IMOV16rr; - case NVPTX::V2i16Mov: return NVPTX::IMOV16rr; - case NVPTX::V2i32Extract: return NVPTX::IMOV32rr; - case NVPTX::V2i32Insert: return NVPTX::IMOV32rr; - case NVPTX::V2i32Mov: return NVPTX::IMOV32rr; - case NVPTX::V2i64Extract: return NVPTX::IMOV64rr; - case NVPTX::V2i64Insert: return NVPTX::IMOV64rr; - case NVPTX::V2i64Mov: return NVPTX::IMOV64rr; - case NVPTX::V2i8Extract: return NVPTX::IMOV8rr; - case NVPTX::V2i8Insert: return NVPTX::IMOV8rr; - case NVPTX::V2i8Mov: return NVPTX::IMOV8rr; - case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; - case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec; - case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz; - case NVPTX::V4F32Div: return NVPTX::FDIV32rr; - case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr; - case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr; - case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr; - case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr; - case NVPTX::V4f32Extract: return NVPTX::FMOV32rr; - case NVPTX::V4f32Insert: return NVPTX::FMOV32rr; - case NVPTX::V4f32Mov: return NVPTX::FMOV32rr; - case NVPTX::V4i16Extract: return NVPTX::IMOV16rr; - case NVPTX::V4i16Insert: return NVPTX::IMOV16rr; - case NVPTX::V4i16Mov: return NVPTX::IMOV16rr; - case NVPTX::V4i32Extract: return NVPTX::IMOV32rr; - case NVPTX::V4i32Insert: return NVPTX::IMOV32rr; - case NVPTX::V4i32Mov: return NVPTX::IMOV32rr; - case NVPTX::V4i8Extract: return NVPTX::IMOV8rr; - case NVPTX::V4i8Insert: return NVPTX::IMOV8rr; - case NVPTX::V4i8Mov: return NVPTX::IMOV8rr; - case NVPTX::VAddV2I16: return NVPTX::ADDi16rr; - case NVPTX::VAddV2I32: return NVPTX::ADDi32rr; - case NVPTX::VAddV2I64: return NVPTX::ADDi64rr; - case NVPTX::VAddV2I8: return NVPTX::ADDi8rr; - case NVPTX::VAddV4I16: return NVPTX::ADDi16rr; - case NVPTX::VAddV4I32: return NVPTX::ADDi32rr; - case NVPTX::VAddV4I8: return NVPTX::ADDi8rr; - case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr; - case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz; - case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr; - case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr; - case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz; - case NVPTX::VAndV2I16: return NVPTX::ANDb16rr; - case NVPTX::VAndV2I32: return NVPTX::ANDb32rr; - case NVPTX::VAndV2I64: return NVPTX::ANDb64rr; - case NVPTX::VAndV2I8: return NVPTX::ANDb8rr; - case NVPTX::VAndV4I16: return NVPTX::ANDb16rr; - case NVPTX::VAndV4I32: return NVPTX::ANDb32rr; - case NVPTX::VAndV4I8: return NVPTX::ANDb8rr; - case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz; - case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr; - case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr; - case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz; - case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr; - case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr; - case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr; - case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr; - case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr; - case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr; - case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr; - case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr; - case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr; - case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr; - case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr; - case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr; - case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr; - case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr; - case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr; - case NVPTX::VMultV2I16: return NVPTX::MULTi16rr; - case NVPTX::VMultV2I32: return NVPTX::MULTi32rr; - case NVPTX::VMultV2I64: return NVPTX::MULTi64rr; - case NVPTX::VMultV2I8: return NVPTX::MULTi8rr; - case NVPTX::VMultV4I16: return NVPTX::MULTi16rr; - case NVPTX::VMultV4I32: return NVPTX::MULTi32rr; - case NVPTX::VMultV4I8: return NVPTX::MULTi8rr; - case NVPTX::VNegV2I16: return NVPTX::INEG16; - case NVPTX::VNegV2I32: return NVPTX::INEG32; - case NVPTX::VNegV2I64: return NVPTX::INEG64; - case NVPTX::VNegV2I8: return NVPTX::INEG8; - case NVPTX::VNegV4I16: return NVPTX::INEG16; - case NVPTX::VNegV4I32: return NVPTX::INEG32; - case NVPTX::VNegV4I8: return NVPTX::INEG8; - case NVPTX::VNegv2f32: return NVPTX::FNEGf32; - case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz; - case NVPTX::VNegv2f64: return NVPTX::FNEGf64; - case NVPTX::VNegv4f32: return NVPTX::FNEGf32; - case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz; - case NVPTX::VNotV2I16: return NVPTX::NOT16; - case NVPTX::VNotV2I32: return NVPTX::NOT32; - case NVPTX::VNotV2I64: return NVPTX::NOT64; - case NVPTX::VNotV2I8: return NVPTX::NOT8; - case NVPTX::VNotV4I16: return NVPTX::NOT16; - case NVPTX::VNotV4I32: return NVPTX::NOT32; - case NVPTX::VNotV4I8: return NVPTX::NOT8; - case NVPTX::VOrV2I16: return NVPTX::ORb16rr; - case NVPTX::VOrV2I32: return NVPTX::ORb32rr; - case NVPTX::VOrV2I64: return NVPTX::ORb64rr; - case NVPTX::VOrV2I8: return NVPTX::ORb8rr; - case NVPTX::VOrV4I16: return NVPTX::ORb16rr; - case NVPTX::VOrV4I32: return NVPTX::ORb32rr; - case NVPTX::VOrV4I8: return NVPTX::ORb8rr; - case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr; - case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr; - case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr; - case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr; - case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr; - case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr; - case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr; - case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr; - case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr; - case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr; - case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr; - case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr; - case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr; - case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr; - case NVPTX::VSubV2I16: return NVPTX::SUBi16rr; - case NVPTX::VSubV2I32: return NVPTX::SUBi32rr; - case NVPTX::VSubV2I64: return NVPTX::SUBi64rr; - case NVPTX::VSubV2I8: return NVPTX::SUBi8rr; - case NVPTX::VSubV4I16: return NVPTX::SUBi16rr; - case NVPTX::VSubV4I32: return NVPTX::SUBi32rr; - case NVPTX::VSubV4I8: return NVPTX::SUBi8rr; - case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz; - case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr; - case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr; - case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz; - case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr; - case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr; - case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr; - case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr; - case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr; - case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr; - case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr; - case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr; - case NVPTX::VURemV2I16: return NVPTX::UREMi16rr; - case NVPTX::VURemV2I32: return NVPTX::UREMi32rr; - case NVPTX::VURemV2I64: return NVPTX::UREMi64rr; - case NVPTX::VURemV2I8: return NVPTX::UREMi8rr; - case NVPTX::VURemV4I16: return NVPTX::UREMi16rr; - case NVPTX::VURemV4I32: return NVPTX::UREMi32rr; - case NVPTX::VURemV4I8: return NVPTX::UREMi8rr; - case NVPTX::VXorV2I16: return NVPTX::XORb16rr; - case NVPTX::VXorV2I32: return NVPTX::XORb32rr; - case NVPTX::VXorV2I64: return NVPTX::XORb64rr; - case NVPTX::VXorV2I8: return NVPTX::XORb8rr; - case NVPTX::VXorV4I16: return NVPTX::XORb16rr; - case NVPTX::VXorV4I32: return NVPTX::XORb32rr; - case NVPTX::VXorV4I8: return NVPTX::XORb8rr; - case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16; - case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32; - case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64; - case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8; - case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16; - case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32; - case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8; - case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16; - case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32; - case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64; - case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8; - case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16; - case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32; - case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8; - case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16; - case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32; - case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64; - case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8; - case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16; - case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32; - case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8; - case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16; - case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32; - case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64; - case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8; - case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16; - case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32; - case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8; - case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16; - case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32; - case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64; - case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8; - case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16; - case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32; - case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8; - case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16; - case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32; - case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64; - case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8; - case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16; - case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32; - case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8; - case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr; - case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr; - case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr; - case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr; - case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr; - case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr; - case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr; - case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr; - case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr; - case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr; - case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16; - case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32; - case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64; - case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8; - case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16; - case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32; - case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8; - case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16; - case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32; - case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64; - case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8; - case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16; - case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32; - case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8; - case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16; - case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32; - case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64; - case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8; - case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16; - case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32; - case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8; - case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16; - case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32; - case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64; - case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8; - case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16; - case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32; - case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8; - case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16; - case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32; - case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64; - case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8; - case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16; - case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32; - case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8; - case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16; - case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32; - case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64; - case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8; - case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16; - case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32; - case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8; - case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; - case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; - case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; - - case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32; - case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16; - case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8; - case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64; - case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32; - case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16; - case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8; - case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32; - case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32; - case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64; - case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32; - case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16; - case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8; - case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64; - case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32; - case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16; - case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8; - case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32; - case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32; - case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64; - case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32; - case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16; - case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8; - case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64; - case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32; - case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16; - case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8; - case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32; - case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32; - case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64; - case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8; - case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16; - case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8; - case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16; - case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32; - case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32; - - case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar; - case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg; - case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari; - case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi; - case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar; - case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg; - case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari; - case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi; - - case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar; - case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg; - case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari; - case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi; - case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar; - case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg; - case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari; - case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi; - - case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar; - case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg; - case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari; - case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi; - case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar; - case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg; - case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari; - case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi; - - case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar; - case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg; - case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari; - case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi; - case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar; - case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg; - case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari; - case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi; - - case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar; - case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg; - case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari; - case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi; - case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar; - case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg; - case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari; - case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi; - - case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar; - case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg; - case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari; - case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi; - case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar; - case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg; - case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari; - case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi; - - case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar; - case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg; - case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari; - case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi; - case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar; - case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg; - case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari; - case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi; - - case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar; - case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg; - case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari; - case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi; - case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar; - case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg; - case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari; - case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi; - - case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar; - case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg; - case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari; - case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi; - case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar; - case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg; - case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari; - case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi; - - case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar; - case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg; - case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari; - case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi; - case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar; - case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg; - case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari; - case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi; - } - return 0; -} diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h index a7347efd7850..45cc0b8b67f2 100644 --- a/lib/Target/NVPTX/cl_common_defines.h +++ b/lib/Target/NVPTX/cl_common_defines.h @@ -24,22 +24,21 @@ enum { CLK_LUMINANCE = 0x10B9 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - , + , CLK_Rx = 0x10BA, CLK_RGx = 0x10BB, CLK_RGBx = 0x10BC #endif }; - typedef enum clk_channel_type { // valid formats for float return types - CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 - CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 - CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 - CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 - CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half - CLK_FLOAT = 0x10DE, // four channel RGBA float + CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 + CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 + CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 + CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 + CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half + CLK_FLOAT = 0x10DE, // four channel RGBA float #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) CLK_UNORM_SHORT_565 = 0x10D4, @@ -48,7 +47,7 @@ typedef enum clk_channel_type { #endif // valid only for integer return types - CLK_SIGNED_INT8 = 0x10D7, + CLK_SIGNED_INT8 = 0x10D7, CLK_SIGNED_INT16 = 0x10D8, CLK_SIGNED_INT32 = 0x10D9, CLK_UNSIGNED_INT8 = 0x10DA, @@ -56,70 +55,68 @@ typedef enum clk_channel_type { CLK_UNSIGNED_INT32 = 0x10DC, // CI SPI for CPU - __CLK_UNORM_INT8888 , // four channel ARGB unorm8 - __CLK_UNORM_INT8888R, // four channel BGRA unorm8 + __CLK_UNORM_INT8888, // four channel ARGB unorm8 + __CLK_UNORM_INT8888R, // four channel BGRA unorm8 __CLK_VALID_IMAGE_TYPE_COUNT, __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT, - __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to - // represent any image type - __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1 -}clk_channel_type; + __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to + // represent any image type + __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1 +} clk_channel_type; typedef enum clk_sampler_type { - __CLK_ADDRESS_BASE = 0, - CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, + __CLK_ADDRESS_BASE = 0, + CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, + CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, #endif - __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | - CLK_ADDRESS_CLAMP_TO_EDGE | - CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, - __CLK_ADDRESS_BITS = 3, // number of bits required to - // represent address info - - __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, - CLK_NORMALIZED_COORDS_FALSE = 0, - CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, - __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE | - CLK_NORMALIZED_COORDS_TRUE, - __CLK_NORMALIZED_BITS = 1, // number of bits required to - // represent normalization - - __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + - __CLK_NORMALIZED_BITS, - CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, - CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, - CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, - __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | - CLK_FILTER_ANISOTROPIC, - __CLK_FILTER_BITS = 2, // number of bits required to - // represent address info - - __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, - CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, - CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, - CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, - __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | - CLK_MIP_ANISOTROPIC, - __CLK_MIP_BITS = 2, - - __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, - __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | - __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, - - __CLK_ANISOTROPIC_RATIO_BITS = 5, - __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >> - (__CLK_ANISOTROPIC_RATIO_BITS-1) + __CLK_ADDRESS_MASK = + CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, + __CLK_ADDRESS_BITS = 3, // number of bits required to + // represent address info + + __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, + CLK_NORMALIZED_COORDS_FALSE = 0, + CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, + __CLK_NORMALIZED_MASK = + CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE, + __CLK_NORMALIZED_BITS = 1, // number of bits required to + // represent normalization + + __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS, + CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, + CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, + CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, + __CLK_FILTER_MASK = + CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC, + __CLK_FILTER_BITS = 2, // number of bits required to + // represent address info + + __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, + CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, + CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, + CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, + __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC, + __CLK_MIP_BITS = 2, + + __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, + __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | + __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, + + __CLK_ANISOTROPIC_RATIO_BITS = 5, + __CLK_ANISOTROPIC_RATIO_MASK = + (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1) } clk_sampler_type; // Memory synchronization -#define CLK_LOCAL_MEM_FENCE (1 << 0) -#define CLK_GLOBAL_MEM_FENCE (1 << 1) +#define CLK_LOCAL_MEM_FENCE (1 << 0) +#define CLK_GLOBAL_MEM_FENCE (1 << 1) #endif // __CL_COMMON_DEFINES_H__ diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py deleted file mode 100644 index ed0666823124..000000000000 --- a/lib/Target/NVPTX/gen-register-defs.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python - -num_regs = 396 - -outFile = open('NVPTXRegisterInfo.td', 'w') - -outFile.write(''' -//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class NVPTXReg<string n> : Register<n> { - let Namespace = "NVPTX"; -} - -class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList> - : RegisterClass <"NVPTX", regTypes, alignment, regList>; - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// - -// Special Registers used as stack pointer -def VRFrame : NVPTXReg<"%SP">; -def VRFrameLocal : NVPTXReg<"%SPL">; - -// Special Registers used as the stack -def VRDepot : NVPTXReg<"%Depot">; -''') - -# Predicates -outFile.write(''' -//===--- Predicate --------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i)) - -# Int8 -outFile.write(''' -//===--- 8-bit ------------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i)) - -# Int16 -outFile.write(''' -//===--- 16-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i)) - -# Int32 -outFile.write(''' -//===--- 32-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i)) - -# Int64 -outFile.write(''' -//===--- 64-bit -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i)) - -# F32 -outFile.write(''' -//===--- 32-bit float -----------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i)) - -# F64 -outFile.write(''' -//===--- 64-bit float -----------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i)) - -# Vector registers -outFile.write(''' -//===--- Vector -----------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i)) - -for i in range(0, num_regs): - outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i)) - -# Argument registers -outFile.write(''' -//===--- Arguments --------------------------------------------------------===// -''') -for i in range(0, num_regs): - outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i)) -for i in range(0, num_regs): - outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i)) - -outFile.write(''' -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -''') - -outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1)) -outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1)) - -outFile.write(''' -// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. -def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; -''') - -outFile.write(''' -class NVPTXVecRegClass<list<ValueType> regTypes, int alignment, dag regList, - NVPTXRegClass sClass, - int e, - string n> - : NVPTXRegClass<regTypes, alignment, regList> -{ - NVPTXRegClass scalarClass=sClass; - int elems=e; - string name=n; -} -''') - - -outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1)) -outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1)) - -outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1)) -outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1)) - -outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1)) -outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1)) - -outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1)) -outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1)) - -outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1)) -outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1)) - -outFile.close() - - -outFile = open('NVPTXNumRegisters.h', 'w') -outFile.write(''' -//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef NVPTX_NUM_REGISTERS_H -#define NVPTX_NUM_REGISTERS_H - -namespace llvm { - -const unsigned NVPTXNumRegisters = %d; - -} - -#endif -''' % num_regs) - -outFile.close() |