summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-06 20:13:35 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-06 20:13:35 +0000
commit6694ed095d6b27a2c92ec4fd63664fcd88a05749 (patch)
tree0633c29bd8350e306f3a24a30f3f6045efd35420 /lib/CodeGen
parentd5dc75c5cf109efe52b1da32ec44a667389a0f0a (diff)
downloadsrc-test2-6694ed095d6b27a2c92ec4fd63664fcd88a05749.tar.gz
src-test2-6694ed095d6b27a2c92ec4fd63664fcd88a05749.zip
Notes
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/BackendUtil.cpp30
-rw-r--r--lib/CodeGen/CGBuiltin.cpp84
-rw-r--r--lib/CodeGen/CGCall.cpp6
-rw-r--r--lib/CodeGen/CGExpr.cpp11
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp23
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h36
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp270
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h51
-rw-r--r--lib/CodeGen/CodeGenAction.cpp16
-rw-r--r--lib/CodeGen/CodeGenFunction.h2
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp9
-rw-r--r--lib/CodeGen/TargetInfo.cpp206
12 files changed, 505 insertions, 239 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 164e52d7de27..ed09f3a45566 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -14,6 +14,7 @@
#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/Utils.h"
+#include "clang/Lex/HeaderSearchOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
@@ -32,6 +33,7 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTOBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Passes/PassBuilder.h"
@@ -61,6 +63,7 @@ namespace {
class EmitAssemblyHelper {
DiagnosticsEngine &Diags;
+ const HeaderSearchOptions &HSOpts;
const CodeGenOptions &CodeGenOpts;
const clang::TargetOptions &TargetOpts;
const LangOptions &LangOpts;
@@ -100,11 +103,14 @@ private:
raw_pwrite_stream &OS);
public:
- EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts,
+ EmitAssemblyHelper(DiagnosticsEngine &_Diags,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const CodeGenOptions &CGOpts,
const clang::TargetOptions &TOpts,
const LangOptions &LOpts, Module *M)
- : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
- TheModule(M), CodeGenerationTime("codegen", "Code Generation Time") {}
+ : Diags(_Diags), HSOpts(HeaderSearchOpts), CodeGenOpts(CGOpts),
+ TargetOpts(TOpts), LangOpts(LOpts), TheModule(M),
+ CodeGenerationTime("codegen", "Code Generation Time") {}
~EmitAssemblyHelper() {
if (CodeGenOpts.DisableFree)
@@ -584,12 +590,18 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
Options.MCOptions.MCIncrementalLinkerCompatible =
CodeGenOpts.IncrementalLinkerCompatible;
- Options.MCOptions.MCPIECopyRelocations =
- CodeGenOpts.PIECopyRelocations;
+ Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations;
Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments;
Options.MCOptions.ABIName = TargetOpts.ABI;
+ for (const auto &Entry : HSOpts.UserEntries)
+ if (!Entry.IsFramework &&
+ (Entry.Group == frontend::IncludeDirGroup::Quoted ||
+ Entry.Group == frontend::IncludeDirGroup::Angled ||
+ Entry.Group == frontend::IncludeDirGroup::System))
+ Options.MCOptions.IASSearchPaths.push_back(
+ Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path);
TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr,
Options, RM, CM, OptLevel));
@@ -929,17 +941,19 @@ static void runThinLTOBackend(const CodeGenOptions &CGOpts, Module *M,
}
void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
+ const HeaderSearchOptions &HeaderOpts,
const CodeGenOptions &CGOpts,
const clang::TargetOptions &TOpts,
- const LangOptions &LOpts, const llvm::DataLayout &TDesc,
- Module *M, BackendAction Action,
+ const LangOptions &LOpts,
+ const llvm::DataLayout &TDesc, Module *M,
+ BackendAction Action,
std::unique_ptr<raw_pwrite_stream> OS) {
if (!CGOpts.ThinLTOIndexFile.empty()) {
runThinLTOBackend(CGOpts, M, std::move(OS));
return;
}
- EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
+ EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M);
if (CGOpts.ExperimentalNewPassManager)
AsmHelper.EmitAssemblyWithNewPassManager(Action, std::move(OS));
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 43ca74761fbd..4d34b3e9222f 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -35,6 +35,11 @@ using namespace clang;
using namespace CodeGen;
using namespace llvm;
+static
+int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
+ return std::min(High, std::max(Low, Value));
+}
+
/// getBuiltinLibFunction - Given a builtin id for a function like
/// "__builtin_fabsf", return a Function* for "fabsf".
llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
@@ -8191,6 +8196,85 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
llvm_unreachable("Unknown FMA operation");
return nullptr; // Suppress no-return warning
}
+
+ case PPC::BI__builtin_vsx_insertword: {
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
+
+ // Third argument is a compile time constant int. It must be clamped to
+ // to the range [0, 12].
+ ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+ assert(ArgCI &&
+ "Third arg to xxinsertw intrinsic must be constant integer");
+ const int64_t MaxIndex = 12;
+ int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
+
+ // The builtin semantics don't exactly match the xxinsertw instructions
+ // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
+ // word from the first argument, and inserts it in the second argument. The
+ // instruction extracts the word from its second input register and inserts
+ // it into its first input register, so swap the first and second arguments.
+ std::swap(Ops[0], Ops[1]);
+
+ // Need to cast the second argument from a vector of unsigned int to a
+ // vector of long long.
+ Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
+
+ if (getTarget().isLittleEndian()) {
+ // Create a shuffle mask of (1, 0)
+ Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
+ ConstantInt::get(Int32Ty, 0)
+ };
+ Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+
+ // Reverse the double words in the vector we will extract from.
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+ Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
+
+ // Reverse the index.
+ Index = MaxIndex - Index;
+ }
+
+ // Intrinsic expects the first arg to be a vector of int.
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+ Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
+ return Builder.CreateCall(F, Ops);
+ }
+
+ case PPC::BI__builtin_vsx_extractuword: {
+ llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
+
+ // Intrinsic expects the first argument to be a vector of doublewords.
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
+
+ // The second argument is a compile time constant int that needs to
+ // be clamped to the range [0, 12].
+ ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
+ assert(ArgCI &&
+ "Second Arg to xxextractuw intrinsic must be a constant integer!");
+ const int64_t MaxIndex = 12;
+ int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
+
+ if (getTarget().isLittleEndian()) {
+ // Reverse the index.
+ Index = MaxIndex - Index;
+ Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
+
+ // Emit the call, then reverse the double words of the results vector.
+ Value *Call = Builder.CreateCall(F, Ops);
+
+ // Create a shuffle mask of (1, 0)
+ Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
+ ConstantInt::get(Int32Ty, 0)
+ };
+ Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+
+ Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
+ return ShuffleCall;
+ } else {
+ Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
+ return Builder.CreateCall(F, Ops);
+ }
+ }
}
}
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 9b96a59aec38..c7c61e0c8ecb 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -393,15 +393,13 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
// When declaring a function without a prototype, always use a
// non-variadic type.
- if (isa<FunctionNoProtoType>(FTy)) {
- CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
+ if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) {
return arrangeLLVMFunctionInfo(
noProto->getReturnType(), /*instanceMethod=*/false,
/*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All);
}
- assert(isa<FunctionProtoType>(FTy));
- return arrangeFreeFunctionType(FTy.getAs<FunctionProtoType>(), FD);
+ return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>(), FD);
}
/// Arrange the argument and result information for the declaration or
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 183201c78e36..e5e34a5f3ed6 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -604,12 +604,13 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
}
if (Checks.size() > 0) {
+ // Make sure we're not losing information. Alignment needs to be a power of
+ // 2
+ assert(!AlignVal || (uint64_t)1 << llvm::Log2_64(AlignVal) == AlignVal);
llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(Loc),
- EmitCheckTypeDescriptor(Ty),
- llvm::ConstantInt::get(SizeTy, AlignVal),
- llvm::ConstantInt::get(Int8Ty, TCK)
- };
+ EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(Ty),
+ llvm::ConstantInt::get(Int8Ty, AlignVal ? llvm::Log2_64(AlignVal) : 1),
+ llvm::ConstantInt::get(Int8Ty, TCK)};
EmitCheck(Checks, SanitizerHandler::TypeMismatch, StaticData, Ptr);
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 0624d86b564a..27af344fae87 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2701,14 +2701,16 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
"only required for the device "
"code generation.");
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
- OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
+ OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
+ /*Flags=*/0);
++OffloadingEntriesNum;
}
void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
StringRef ParentName, unsigned LineNum,
- llvm::Constant *Addr, llvm::Constant *ID) {
+ llvm::Constant *Addr, llvm::Constant *ID,
+ int32_t Flags) {
// If we are emitting code for a target, the entry is already initialized,
// only has to be registered.
if (CGM.getLangOpts().OpenMPIsDevice) {
@@ -2719,9 +2721,10 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
assert(Entry.isValid() && "Entry not initialized!");
Entry.setAddress(Addr);
Entry.setID(ID);
+ Entry.setFlags(Flags);
return;
} else {
- OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
+ OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
}
}
@@ -2888,7 +2891,8 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
}
void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
- llvm::Constant *Addr, uint64_t Size) {
+ llvm::Constant *Addr, uint64_t Size,
+ int32_t Flags) {
StringRef Name = Addr->getName();
auto *TgtOffloadEntryType = cast<llvm::StructType>(
CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
@@ -2918,6 +2922,8 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
EntryInit.add(AddrPtr);
EntryInit.add(StrPtr);
EntryInit.addInt(CGM.SizeTy, Size);
+ EntryInit.addInt(CGM.Int32Ty, Flags);
+ EntryInit.addInt(CGM.Int32Ty, 0);
llvm::GlobalVariable *Entry =
EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
Align,
@@ -3090,6 +3096,8 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
// // (function or global)
// char *name; // Name of the function or global.
// size_t size; // Size of the entry info (0 if it a function).
+ // int32_t flags; // Flags associated with the entry, e.g. 'link'.
+ // int32_t reserved; // Reserved, to use by the runtime library.
// };
if (TgtOffloadEntryQTy.isNull()) {
ASTContext &C = CGM.getContext();
@@ -3098,6 +3106,10 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
addFieldToRecordDecl(C, RD, C.VoidPtrTy);
addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
addFieldToRecordDecl(C, RD, C.getSizeType());
+ addFieldToRecordDecl(
+ C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
+ addFieldToRecordDecl(
+ C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
RD->completeDefinition();
TgtOffloadEntryQTy = C.getRecordType(RD);
}
@@ -4852,7 +4864,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
// Register the information for the entry associated with this target region.
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
- DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
+ DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
+ /*Flags=*/0);
}
/// discard all CompoundStmts intervening between two constructs
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 9057e5ec4c14..9a784dff0ae8 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -110,9 +110,9 @@ protected:
CodeGenModule &CGM;
/// \brief Creates offloading entry for the provided entry ID \a ID,
- /// address \a Addr and size \a Size.
+ /// address \a Addr, size \a Size, and flags \a Flags.
virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
- uint64_t Size);
+ uint64_t Size, int32_t Flags = 0);
/// \brief Helper to emit outlined function for 'target' directive.
/// \param D Directive to emit.
@@ -245,10 +245,10 @@ private:
unsigned OffloadingEntriesNum;
public:
- /// \brief Base class of the entries info.
+ /// Base class of the entries info.
class OffloadEntryInfo {
public:
- /// \brief Kind of a given entry. Currently, only target regions are
+ /// Kind of a given entry. Currently, only target regions are
/// supported.
enum OffloadingEntryInfoKinds : unsigned {
// Entry is a target region.
@@ -257,17 +257,24 @@ private:
OFFLOAD_ENTRY_INFO_INVALID = ~0u
};
- OffloadEntryInfo() : Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {}
- explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order)
- : Order(Order), Kind(Kind) {}
+ OffloadEntryInfo()
+ : Flags(0), Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {}
+ explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
+ int32_t Flags)
+ : Flags(Flags), Order(Order), Kind(Kind) {}
bool isValid() const { return Order != ~0u; }
unsigned getOrder() const { return Order; }
OffloadingEntryInfoKinds getKind() const { return Kind; }
+ int32_t getFlags() const { return Flags; }
+ void setFlags(int32_t NewFlags) { Flags = NewFlags; }
static bool classof(const OffloadEntryInfo *Info) { return true; }
- protected:
- // \brief Order this entry was emitted.
+ private:
+ /// Flags associated with the device global.
+ int32_t Flags;
+
+ /// Order this entry was emitted.
unsigned Order;
OffloadingEntryInfoKinds Kind;
@@ -292,12 +299,13 @@ private:
public:
OffloadEntryInfoTargetRegion()
- : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u),
+ : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u,
+ /*Flags=*/0),
Addr(nullptr), ID(nullptr) {}
explicit OffloadEntryInfoTargetRegion(unsigned Order,
llvm::Constant *Addr,
- llvm::Constant *ID)
- : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order),
+ llvm::Constant *ID, int32_t Flags)
+ : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order, Flags),
Addr(Addr), ID(ID) {}
llvm::Constant *getAddress() const { return Addr; }
@@ -321,8 +329,8 @@ private:
/// \brief Register target region entry.
void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
StringRef ParentName, unsigned LineNum,
- llvm::Constant *Addr,
- llvm::Constant *ID);
+ llvm::Constant *Addr, llvm::Constant *ID,
+ int32_t Flags);
/// \brief Return true if a target region entry with the provided
/// information exists.
bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index fe0e2acdfdbf..bc1458b1c203 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -22,14 +22,10 @@ using namespace CodeGen;
namespace {
enum OpenMPRTLFunctionNVPTX {
- /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
- /// kmp_int32 thread_limit);
+ /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit);
OMPRTL_NVPTX__kmpc_kernel_init,
-};
-
-// NVPTX Address space
-enum AddressSpace {
- AddressSpaceShared = 3,
+ /// \brief Call to void __kmpc_kernel_deinit();
+ OMPRTL_NVPTX__kmpc_kernel_deinit,
};
} // namespace
@@ -70,6 +66,15 @@ static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
/// Synchronize all GPU threads in a block.
static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
+/// Get the value of the thread_limit clause in the teams directive.
+/// The runtime encodes thread_limit in the launch parameter, always starting
+/// thread_limit+warpSize threads per team.
+static llvm::Value *getThreadLimit(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
+ "thread_limit");
+}
+
/// Get the thread id of the OMP master thread.
/// The master thread id is the first thread (lane) of the last warp in the
/// GPU block. Warp size is assumed to be some power of 2.
@@ -103,35 +108,105 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
/* placeholder */ "_worker", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
- WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage);
- WorkerFn->addFnAttr(llvm::Attribute::NoInline);
}
-void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
- //
- // Initialize master-worker control state in shared memory.
- //
+void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) {
+ EntryFunctionState EST;
+ WorkerFunctionState WST(CGM);
+
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ CGOpenMPRuntimeNVPTX &RT;
+ CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
+ CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
+
+ public:
+ NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
+ CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+ CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
+ : RT(RT), EST(EST), WST(WST) {}
+ void Enter(CodeGenFunction &CGF) override {
+ RT.emitGenericEntryHeader(CGF, EST, WST);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ RT.emitGenericEntryFooter(CGF, EST);
+ }
+ } Action(*this, EST, WST);
+ CodeGen.setAction(Action);
+ emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+ IsOffloadEntry, CodeGen);
- auto DL = CGM.getDataLayout();
- ActiveWorkers = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
- llvm::GlobalValue::CommonLinkage,
- llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
- llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
- ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
-
- WorkID = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
- llvm::GlobalValue::CommonLinkage,
- llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
- llvm::GlobalVariable::NotThreadLocal, AddressSpaceShared);
- WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
+ // Create the worker function
+ emitWorkerFunction(WST);
+
+ // Now change the name of the worker function to correspond to this target
+ // region's entry function.
+ WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
+}
+
+// Setup NVPTX threads for master-worker OpenMP scheme.
+void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
+ EntryFunctionState &EST,
+ WorkerFunctionState &WST) {
+ CGBuilderTy &Bld = CGF.Builder;
+
+ llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
+ llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+ llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+ EST.ExitBB = CGF.createBasicBlock(".exit");
+
+ auto *IsWorker =
+ Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
+ Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
+
+ CGF.EmitBlock(WorkerBB);
+ CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None);
+ CGF.EmitBranch(EST.ExitBB);
+
+ CGF.EmitBlock(MasterCheckBB);
+ auto *IsMaster =
+ Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
+ Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
+
+ CGF.EmitBlock(MasterBB);
+ // First action in sequential region:
+ // Initialize the state of the OpenMP runtime library on the GPU.
+ llvm::Value *Args[] = {getThreadLimit(CGF)};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
+}
+
+void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
+ EntryFunctionState &EST) {
+ if (!EST.ExitBB)
+ EST.ExitBB = CGF.createBasicBlock(".exit");
+
+ llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
+ CGF.EmitBranch(TerminateBB);
+
+ CGF.EmitBlock(TerminateBB);
+ // Signal termination condition.
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), None);
+ // Barrier to terminate worker threads.
+ syncCTAThreads(CGF);
+ // Master thread jumps to exit point.
+ CGF.EmitBranch(EST.ExitBB);
+
+ CGF.EmitBlock(EST.ExitBB);
+ EST.ExitBB = nullptr;
}
void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
auto &Ctx = CGM.getContext();
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+ CGF.disableDebugInfo();
CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
emitWorkerLoop(CGF, WST);
CGF.FinishFunction();
@@ -163,21 +238,26 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.EmitBlock(AwaitBB);
// Wait for parallel work
syncCTAThreads(CGF);
+
+ Address WorkFn =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
+ Address ExecStatus =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
+ CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
+ CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
+
+ // TODO: Call into runtime to get parallel work.
+
// On termination condition (workid == 0), exit loop.
- llvm::Value *ShouldTerminate = Bld.CreateICmpEQ(
- Bld.CreateAlignedLoad(WorkID, WorkID->getAlignment()),
- llvm::Constant::getNullValue(WorkID->getType()->getElementType()),
- "should_terminate");
+ llvm::Value *ShouldTerminate =
+ Bld.CreateIsNull(Bld.CreateLoad(WorkFn), "should_terminate");
Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
// Activate requested workers.
CGF.EmitBlock(SelectWorkersBB);
- llvm::Value *ThreadID = getNVPTXThreadID(CGF);
- llvm::Value *ActiveThread = Bld.CreateICmpSLT(
- ThreadID,
- Bld.CreateAlignedLoad(ActiveWorkers, ActiveWorkers->getAlignment()),
- "active_thread");
- Bld.CreateCondBr(ActiveThread, ExecuteBB, BarrierBB);
+ llvm::Value *IsActive =
+ Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
+ Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
// Signal start of parallel region.
CGF.EmitBlock(ExecuteBB);
@@ -197,72 +277,6 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.EmitBlock(ExitBB);
}
-// Setup NVPTX threads for master-worker OpenMP scheme.
-void CGOpenMPRuntimeNVPTX::emitEntryHeader(CodeGenFunction &CGF,
- EntryFunctionState &EST,
- WorkerFunctionState &WST) {
- CGBuilderTy &Bld = CGF.Builder;
-
- // Get the master thread id.
- llvm::Value *MasterID = getMasterThreadID(CGF);
- // Current thread's identifier.
- llvm::Value *ThreadID = getNVPTXThreadID(CGF);
-
- // Setup BBs in entry function.
- llvm::BasicBlock *WorkerCheckBB = CGF.createBasicBlock(".check.for.worker");
- llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
- llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
- EST.ExitBB = CGF.createBasicBlock(".exit");
-
- // The head (master thread) marches on while its body of companion threads in
- // the warp go to sleep.
- llvm::Value *ShouldDie =
- Bld.CreateICmpUGT(ThreadID, MasterID, "excess_in_master_warp");
- Bld.CreateCondBr(ShouldDie, EST.ExitBB, WorkerCheckBB);
-
- // Select worker threads...
- CGF.EmitBlock(WorkerCheckBB);
- llvm::Value *IsWorker = Bld.CreateICmpULT(ThreadID, MasterID, "is_worker");
- Bld.CreateCondBr(IsWorker, WorkerBB, MasterBB);
-
- // ... and send to worker loop, awaiting parallel invocation.
- CGF.EmitBlock(WorkerBB);
- CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None);
- CGF.EmitBranch(EST.ExitBB);
-
- // Only master thread executes subsequent serial code.
- CGF.EmitBlock(MasterBB);
-
- // First action in sequential region:
- // Initialize the state of the OpenMP runtime library on the GPU.
- llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), getNVPTXThreadID(CGF)};
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init),
- Args);
-}
-
-void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF,
- EntryFunctionState &EST) {
- if (!EST.ExitBB)
- EST.ExitBB = CGF.createBasicBlock(".exit");
-
- CGBuilderTy &Bld = CGF.Builder;
- llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
- CGF.EmitBranch(TerminateBB);
-
- CGF.EmitBlock(TerminateBB);
- // Signal termination condition.
- Bld.CreateAlignedStore(
- llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID,
- WorkID->getAlignment());
- // Barrier to terminate worker threads.
- syncCTAThreads(CGF);
- // Master thread jumps to exit point.
- CGF.EmitBranch(EST.ExitBB);
-
- CGF.EmitBlock(EST.ExitBB);
- EST.ExitBB = nullptr;
-}
-
/// \brief Returns specified OpenMP runtime function for the current OpenMP
/// implementation. Specialized for the NVPTX device.
/// \param Function OpenMP runtime function.
@@ -272,21 +286,27 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
llvm::Constant *RTLFn = nullptr;
switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
case OMPRTL_NVPTX__kmpc_kernel_init: {
- // Build void __kmpc_kernel_init(kmp_int32 omp_handle,
- // kmp_int32 thread_limit);
- llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty};
+ // Build void __kmpc_kernel_init(kmp_int32 thread_limit);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
break;
}
+ case OMPRTL_NVPTX__kmpc_kernel_deinit: {
+ // Build void __kmpc_kernel_deinit();
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, {}, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
+ break;
+ }
}
return RTLFn;
}
void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
llvm::Constant *Addr,
- uint64_t Size) {
+ uint64_t Size, int32_t) {
auto *F = dyn_cast<llvm::Function>(Addr);
// TODO: Add support for global variables on the device after declare target
// support.
@@ -315,44 +335,14 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");
- EntryFunctionState EST;
- WorkerFunctionState WST(CGM);
-
- // Emit target region as a standalone region.
- class NVPTXPrePostActionTy : public PrePostActionTy {
- CGOpenMPRuntimeNVPTX &RT;
- CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
- CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
-
- public:
- NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
- CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
- CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
- : RT(RT), EST(EST), WST(WST) {}
- void Enter(CodeGenFunction &CGF) override {
- RT.emitEntryHeader(CGF, EST, WST);
- }
- void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); }
- } Action(*this, EST, WST);
- CodeGen.setAction(Action);
- emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
- IsOffloadEntry, CodeGen);
-
- // Create the worker function
- emitWorkerFunction(WST);
-
- // Now change the name of the worker function to correspond to this target
- // region's entry function.
- WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
+ emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
}
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
- : CGOpenMPRuntime(CGM), ActiveWorkers(nullptr), WorkID(nullptr) {
+ : CGOpenMPRuntime(CGM) {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP NVPTX can only handle device code.");
-
- // Called once per module during initialization.
- initializeEnvironment();
}
void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index a33fb27579f6..63a02965a5bd 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -24,7 +24,7 @@ namespace clang {
namespace CodeGen {
class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
-public:
+private:
struct EntryFunctionState {
llvm::BasicBlock *ExitBB = nullptr;
};
@@ -40,34 +40,21 @@ public:
void createWorkerFunction(CodeGenModule &CGM);
};
- /// \brief Helper for target entry function. Guide the master and worker
- /// threads to their respective locations.
- void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
- WorkerFunctionState &WST);
-
- /// \brief Signal termination of OMP execution.
- void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
-
-private:
- //
- // Private state and methods.
- //
-
- // Master-worker control state.
- // Number of requested OMP threads in parallel region.
- llvm::GlobalVariable *ActiveWorkers;
- // Outlined function for the workers to execute.
- llvm::GlobalVariable *WorkID;
-
- /// \brief Initialize master-worker control state.
- void initializeEnvironment();
-
/// \brief Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
/// \brief Helper for worker function. Emit body of worker loop.
void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
+ /// \brief Helper for generic target entry function. Guide the master and
+ /// worker threads to their respective locations.
+ void emitGenericEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+ WorkerFunctionState &WST);
+
+ /// \brief Signal termination of OMP execution for generic target entry
+ /// function.
+ void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
/// \brief Returns specified OpenMP runtime function for the current OpenMP
/// implementation. Specialized for the NVPTX device.
/// \param Function OpenMP runtime function.
@@ -79,9 +66,23 @@ private:
//
/// \brief Creates offloading entry for the provided entry ID \a ID,
- /// address \a Addr and size \a Size.
+ /// address \a Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
- uint64_t Size) override;
+ uint64_t Size, int32_t Flags = 0) override;
+
+ /// \brief Emit outlined function specialized for the Fork-Join
+ /// programming model for applicable target directives on the NVPTX device.
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitGenericKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen);
/// \brief Emit outlined function for 'target' directive on the NVPTX
/// device.
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 1e17918df4a4..5f74141d75b3 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -44,6 +44,7 @@ namespace clang {
virtual void anchor();
DiagnosticsEngine &Diags;
BackendAction Action;
+ const HeaderSearchOptions &HeaderSearchOpts;
const CodeGenOptions &CodeGenOpts;
const TargetOptions &TargetOpts;
const LangOptions &LangOpts;
@@ -77,8 +78,8 @@ namespace clang {
const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
CoverageSourceInfo *CoverageInfo = nullptr)
- : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts),
- TargetOpts(TargetOpts), LangOpts(LangOpts),
+ : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+ CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
AsmOutStream(std::move(OS)), Context(nullptr),
LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
LLVMIRGenerationRefCount(0),
@@ -225,8 +226,8 @@ namespace clang {
EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
- EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
- C.getTargetInfo().getDataLayout(),
+ EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts,
+ LangOpts, C.getTargetInfo().getDataLayout(),
getModule(), Action, std::move(AsmOutStream));
Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
@@ -898,9 +899,10 @@ void CodeGenAction::ExecuteAction() {
Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler,
&CI.getDiagnostics());
- EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
- CI.getLangOpts(), CI.getTarget().getDataLayout(),
- TheModule.get(), BA, std::move(OS));
+ EmitBackendOutput(CI.getDiagnostics(), CI.getHeaderSearchOpts(),
+ CI.getCodeGenOpts(), TargetOpts, CI.getLangOpts(),
+ CI.getTarget().getDataLayout(), TheModule.get(), BA,
+ std::move(OS));
return;
}
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 1347f54df9ac..05522cd40024 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -120,7 +120,7 @@ enum TypeEvaluationKind {
SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0) \
SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \
SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \
- SANITIZER_CHECK(TypeMismatch, type_mismatch, 0) \
+ SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \
SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0)
enum SanitizerHandler {
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index baf7811eedaf..754f9968b67f 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -282,7 +282,7 @@ public:
// Print the IR for the PCH container to the debug output.
llvm::SmallString<0> Buffer;
clang::EmitBackendOutput(
- Diags, CodeGenOpts, TargetOpts, LangOpts,
+ Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts,
Ctx.getTargetInfo().getDataLayout(), M.get(),
BackendAction::Backend_EmitLL,
llvm::make_unique<llvm::raw_svector_ostream>(Buffer));
@@ -290,9 +290,10 @@ public:
});
// Use the LLVM backend to emit the pch container.
- clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
- Ctx.getTargetInfo().getDataLayout(), M.get(),
- BackendAction::Backend_EmitObj, std::move(OS));
+ clang::EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts,
+ LangOpts, Ctx.getTargetInfo().getDataLayout(),
+ M.get(), BackendAction::Backend_EmitObj,
+ std::move(OS));
// Free the memory for the temporary buffer.
llvm::SmallVector<char, 0> Empty;
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 391eb53d2500..d2fc3888ef29 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -871,6 +871,14 @@ static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
return NumMembers <= 4;
}
+/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
+static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
+ auto AI = ABIArgInfo::getDirect(T);
+ AI.setInReg(true);
+ AI.setCanBeFlattened(false);
+ return AI;
+}
+
//===----------------------------------------------------------------------===//
// X86-32 ABI Implementation
//===----------------------------------------------------------------------===//
@@ -884,6 +892,11 @@ struct CCState {
unsigned FreeSSERegs;
};
+enum {
+ // Vectorcall only allows the first 6 parameters to be passed in registers.
+ VectorcallMaxParamNumAsReg = 6
+};
+
/// X86_32ABIInfo - The X86-32 ABI information.
class X86_32ABIInfo : public SwiftABIInfo {
enum Class {
@@ -929,6 +942,8 @@ class X86_32ABIInfo : public SwiftABIInfo {
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
+ ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State,
+ const ABIArgInfo& current) const;
/// \brief Updates the number of available free registers, returns
/// true if any registers were allocated.
bool updateFreeRegs(QualType Ty, CCState &State) const;
@@ -946,6 +961,8 @@ class X86_32ABIInfo : public SwiftABIInfo {
void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
CharUnits &StackOffset, ABIArgInfo &Info,
QualType Type) const;
+ void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
+ bool &UsedInAlloca) const;
public:
@@ -1494,6 +1511,27 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
return true;
}
+ABIArgInfo
+X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State,
+ const ABIArgInfo &current) const {
+ // Assumes vectorCall calling convention.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+
+ if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (State.FreeSSERegs >= NumElts) {
+ // HVA types get passed directly in registers if there is room.
+ State.FreeSSERegs -= NumElts;
+ return getDirectX86Hva();
+ }
+ // If there's no room, the HVA gets passed as normal indirect
+ // structure.
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+ }
+ return current;
+}
+
ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CCState &State) const {
// FIXME: Set alignment on indirect arguments.
@@ -1513,19 +1551,34 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
// vectorcall adds the concept of a homogenous vector aggregate, similar
- // to other targets.
+ // to other targets, regcall uses some of the HVA rules.
const Type *Base = nullptr;
uint64_t NumElts = 0;
if ((State.CC == llvm::CallingConv::X86_VectorCall ||
State.CC == llvm::CallingConv::X86_RegCall) &&
isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (State.FreeSSERegs >= NumElts) {
- State.FreeSSERegs -= NumElts;
- if (Ty->isBuiltinType() || Ty->isVectorType())
+
+ if (State.CC == llvm::CallingConv::X86_RegCall) {
+ if (State.FreeSSERegs >= NumElts) {
+ State.FreeSSERegs -= NumElts;
+ if (Ty->isBuiltinType() || Ty->isVectorType())
+ return ABIArgInfo::getDirect();
+ return ABIArgInfo::getExpand();
+
+ }
+ return getIndirectResult(Ty, /*ByVal=*/false, State);
+ } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
+ if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) {
+ // Actual floating-point types get registers first time through if
+ // there is registers available
+ State.FreeSSERegs -= NumElts;
return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
+ } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
+ // HVA Types only get registers after everything else has been
+ // set, so it gets set as indirect for now.
+ return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty));
+ }
}
- return getIndirectResult(Ty, /*ByVal=*/false, State);
}
if (isAggregateTypeForABI(Ty)) {
@@ -1604,6 +1657,36 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
return ABIArgInfo::getDirect();
}
+void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State,
+ bool &UsedInAlloca) const {
+ // Vectorcall only allows the first 6 parameters to be passed in registers,
+ // and homogeneous vector aggregates are only put into registers as a second
+ // priority.
+ unsigned Count = 0;
+ CCState ZeroState = State;
+ ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0;
+ // HVAs must be done as a second priority for registers, so the deferred
+ // items are dealt with by going through the pattern a second time.
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = classifyArgumentType(I.type, State);
+ else
+ // Parameters after the 6th cannot be passed in registers,
+ // so pretend there are no registers left for them.
+ I.info = classifyArgumentType(I.type, ZeroState);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ ++Count;
+ }
+ Count = 0;
+ // Go through the arguments a second time to get HVAs registers if there
+ // are still some available.
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = reclassifyHvaArgType(I.type, State, I.info);
+ ++Count;
+ }
+}
+
void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
CCState State(FI.getCallingConvention());
if (IsMCUABI)
@@ -1638,9 +1721,14 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
++State.FreeRegs;
bool UsedInAlloca = false;
- for (auto &I : FI.arguments()) {
- I.info = classifyArgumentType(I.type, State);
- UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ if (State.CC == llvm::CallingConv::X86_VectorCall) {
+ computeVectorCallArgs(FI, State, UsedInAlloca);
+ } else {
+ // If not vectorcall, revert to normal behavior.
+ for (auto &I : FI.arguments()) {
+ I.info = classifyArgumentType(I.type, State);
+ UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca);
+ }
}
// If we needed to use inalloca for any argument, do a second pass and rewrite
@@ -2070,10 +2158,14 @@ public:
}
private:
- ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs,
- bool IsReturnType) const;
-
- bool IsMingw64;
+ ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
+ bool IsVectorCall, bool IsRegCall) const;
+ ABIArgInfo reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs,
+ const ABIArgInfo &current) const;
+ void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs,
+ bool IsVectorCall, bool IsRegCall) const;
+
+ bool IsMingw64;
};
class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -3679,8 +3771,24 @@ Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
/*allowHigherAlign*/ false);
}
+ABIArgInfo
+WinX86_64ABIInfo::reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs,
+ const ABIArgInfo &current) const {
+ // Assumes vectorCall calling convention.
+ const Type *Base = nullptr;
+ uint64_t NumElts = 0;
+
+ if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
+ isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) {
+ FreeSSERegs -= NumElts;
+ return getDirectX86Hva();
+ }
+ return current;
+}
+
ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
- bool IsReturnType) const {
+ bool IsReturnType, bool IsVectorCall,
+ bool IsRegCall) const {
if (Ty->isVoidType())
return ABIArgInfo::getIgnore();
@@ -3704,21 +3812,34 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
}
- // vectorcall adds the concept of a homogenous vector aggregate, similar to
- // other targets.
const Type *Base = nullptr;
uint64_t NumElts = 0;
- if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) {
- if (FreeSSERegs >= NumElts) {
- FreeSSERegs -= NumElts;
- if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
+ // vectorcall adds the concept of a homogenous vector aggregate, similar to
+ // other targets.
+ if ((IsVectorCall || IsRegCall) &&
+ isHomogeneousAggregate(Ty, Base, NumElts)) {
+ if (IsRegCall) {
+ if (FreeSSERegs >= NumElts) {
+ FreeSSERegs -= NumElts;
+ if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())
+ return ABIArgInfo::getDirect();
+ return ABIArgInfo::getExpand();
+ }
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ } else if (IsVectorCall) {
+ if (FreeSSERegs >= NumElts &&
+ (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) {
+ FreeSSERegs -= NumElts;
return ABIArgInfo::getDirect();
- return ABIArgInfo::getExpand();
+ } else if (IsReturnType) {
+ return ABIArgInfo::getExpand();
+ } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
+ // HVAs are delayed and reclassified in the 2nd step.
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ }
}
- return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
}
-
if (Ty->isMemberPointerType()) {
// If the member pointer is represented by an LLVM int or ptr, pass it
// directly.
@@ -3754,6 +3875,32 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
return ABIArgInfo::getDirect();
}
+void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI,
+ unsigned FreeSSERegs,
+ bool IsVectorCall,
+ bool IsRegCall) const {
+ unsigned Count = 0;
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
+ else {
+ // Since these cannot be passed in registers, pretend no registers
+ // are left.
+ unsigned ZeroSSERegsAvail = 0;
+ I.info = classify(I.type, /*FreeSSERegs=*/ZeroSSERegsAvail, false,
+ IsVectorCall, IsRegCall);
+ }
+ ++Count;
+ }
+
+ Count = 0;
+ for (auto &I : FI.arguments()) {
+ if (Count < VectorcallMaxParamNumAsReg)
+ I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info);
+ ++Count;
+ }
+}
+
void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
bool IsVectorCall =
FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall;
@@ -3769,17 +3916,24 @@ void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
}
if (!getCXXABI().classifyReturnType(FI))
- FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true);
+ FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true,
+ IsVectorCall, IsRegCall);
if (IsVectorCall) {
// We can use up to 6 SSE register parameters with vectorcall.
FreeSSERegs = 6;
} else if (IsRegCall) {
+ // RegCall gives us 16 SSE registers, we can reuse the return registers.
FreeSSERegs = 16;
}
- for (auto &I : FI.arguments())
- I.info = classify(I.type, FreeSSERegs, false);
+ if (IsVectorCall) {
+ computeVectorCallArgs(FI, FreeSSERegs, IsVectorCall, IsRegCall);
+ } else {
+ for (auto &I : FI.arguments())
+ I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall);
+ }
+
}
Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,