aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-09 13:28:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-12-09 13:28:42 +0000
commitb1c73532ee8997fe5dfbeb7d223027bdf99758a0 (patch)
tree7d6e51c294ab6719475d660217aa0c0ad0526292 /clang/lib/CodeGen
parent7fa27ce4a07f19b07799a767fc29416f3b625afb (diff)
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/ABIInfoImpl.cpp13
-rw-r--r--clang/lib/CodeGen/ABIInfoImpl.h14
-rw-r--r--clang/lib/CodeGen/BackendConsumer.h166
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp219
-rw-r--r--clang/lib/CodeGen/CGAtomic.cpp308
-rw-r--r--clang/lib/CodeGen/CGBlocks.cpp100
-rw-r--r--clang/lib/CodeGen/CGBuilder.h17
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp1837
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp91
-rw-r--r--clang/lib/CodeGen/CGCUDARuntime.h14
-rw-r--r--clang/lib/CodeGen/CGCXXABI.cpp11
-rw-r--r--clang/lib/CodeGen/CGCall.cpp386
-rw-r--r--clang/lib/CodeGen/CGCall.h54
-rw-r--r--clang/lib/CodeGen/CGClass.cpp161
-rw-r--r--clang/lib/CodeGen/CGCleanup.cpp14
-rw-r--r--clang/lib/CodeGen/CGCoroutine.cpp95
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp150
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.h16
-rw-r--r--clang/lib/CodeGen/CGDecl.cpp64
-rw-r--r--clang/lib/CodeGen/CGDeclCXX.cpp57
-rw-r--r--clang/lib/CodeGen/CGException.cpp69
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp390
-rw-r--r--clang/lib/CodeGen/CGExprCXX.cpp43
-rw-r--r--clang/lib/CodeGen/CGExprComplex.cpp4
-rw-r--r--clang/lib/CodeGen/CGExprConstant.cpp82
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp80
-rw-r--r--clang/lib/CodeGen/CGGPUBuiltin.cpp15
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.cpp59
-rw-r--r--clang/lib/CodeGen/CGLoopInfo.cpp24
-rw-r--r--clang/lib/CodeGen/CGLoopInfo.h6
-rw-r--r--clang/lib/CodeGen/CGNonTrivialStruct.cpp4
-rw-r--r--clang/lib/CodeGen/CGObjC.cpp37
-rw-r--r--clang/lib/CodeGen/CGObjCGNU.cpp186
-rw-r--r--clang/lib/CodeGen/CGObjCMac.cpp145
-rw-r--r--clang/lib/CodeGen/CGObjCRuntime.cpp4
-rw-r--r--clang/lib/CodeGen/CGOpenCLRuntime.cpp50
-rw-r--r--clang/lib/CodeGen/CGOpenCLRuntime.h3
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp1170
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h77
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp516
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.h40
-rw-r--r--clang/lib/CodeGen/CGRecordLayoutBuilder.cpp11
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp44
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp44
-rw-r--r--clang/lib/CodeGen/CGVTT.cpp5
-rw-r--r--clang/lib/CodeGen/CGVTables.cpp82
-rw-r--r--clang/lib/CodeGen/CodeGenABITypes.cpp5
-rw-r--r--clang/lib/CodeGen/CodeGenAction.cpp647
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp115
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h66
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp478
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h120
-rw-r--r--clang/lib/CodeGen/CodeGenPGO.cpp21
-rw-r--r--clang/lib/CodeGen/CodeGenTBAA.cpp33
-rw-r--r--clang/lib/CodeGen/CodeGenTypeCache.h7
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.cpp7
-rw-r--r--clang/lib/CodeGen/CodeGenTypes.h16
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.cpp28
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.h4
-rw-r--r--clang/lib/CodeGen/EHScopeStack.h4
-rw-r--r--clang/lib/CodeGen/ItaniumCXXABI.cpp122
-rw-r--r--clang/lib/CodeGen/LinkInModulesPass.cpp29
-rw-r--r--clang/lib/CodeGen/LinkInModulesPass.h42
-rw-r--r--clang/lib/CodeGen/MicrosoftCXXABI.cpp103
-rw-r--r--clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp3
-rw-r--r--clang/lib/CodeGen/SwiftCallingConv.cpp7
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp2
-rw-r--r--clang/lib/CodeGen/TargetInfo.h3
-rw-r--r--clang/lib/CodeGen/Targets/AArch64.cpp19
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp159
-rw-r--r--clang/lib/CodeGen/Targets/LoongArch.cpp33
-rw-r--r--clang/lib/CodeGen/Targets/NVPTX.cpp71
-rw-r--r--clang/lib/CodeGen/Targets/PPC.cpp23
-rw-r--r--clang/lib/CodeGen/Targets/RISCV.cpp28
-rw-r--r--clang/lib/CodeGen/Targets/Sparc.cpp2
-rw-r--r--clang/lib/CodeGen/Targets/X86.cpp66
-rw-r--r--clang/lib/CodeGen/Targets/XCore.cpp2
77 files changed, 5188 insertions, 4054 deletions
diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp
index 7c30cecfdb9b..2b20d5a13346 100644
--- a/clang/lib/CodeGen/ABIInfoImpl.cpp
+++ b/clang/lib/CodeGen/ABIInfoImpl.cpp
@@ -246,7 +246,7 @@ Address CodeGen::emitMergePHI(CodeGenFunction &CGF, Address Addr1,
}
bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD,
- bool AllowArrays) {
+ bool AllowArrays, bool AsIfNoUniqueAddr) {
if (FD->isUnnamedBitfield())
return true;
@@ -280,13 +280,14 @@ bool CodeGen::isEmptyField(ASTContext &Context, const FieldDecl *FD,
// not arrays of records, so we must also check whether we stripped off an
// array type above.
if (isa<CXXRecordDecl>(RT->getDecl()) &&
- (WasArray || !FD->hasAttr<NoUniqueAddressAttr>()))
+ (WasArray || (!AsIfNoUniqueAddr && !FD->hasAttr<NoUniqueAddressAttr>())))
return false;
- return isEmptyRecord(Context, FT, AllowArrays);
+ return isEmptyRecord(Context, FT, AllowArrays, AsIfNoUniqueAddr);
}
-bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
+bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays,
+ bool AsIfNoUniqueAddr) {
const RecordType *RT = T->getAs<RecordType>();
if (!RT)
return false;
@@ -297,11 +298,11 @@ bool CodeGen::isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
// If this is a C++ record, check the bases first.
if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
for (const auto &I : CXXRD->bases())
- if (!isEmptyRecord(Context, I.getType(), true))
+ if (!isEmptyRecord(Context, I.getType(), true, AsIfNoUniqueAddr))
return false;
for (const auto *I : RD->fields())
- if (!isEmptyField(Context, I, AllowArrays))
+ if (!isEmptyField(Context, I, AllowArrays, AsIfNoUniqueAddr))
return false;
return true;
}
diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h
index 5f0cc289af68..afde08ba100c 100644
--- a/clang/lib/CodeGen/ABIInfoImpl.h
+++ b/clang/lib/CodeGen/ABIInfoImpl.h
@@ -122,13 +122,19 @@ Address emitMergePHI(CodeGenFunction &CGF, Address Addr1,
llvm::BasicBlock *Block2, const llvm::Twine &Name = "");
/// isEmptyField - Return true iff a the field is "empty", that is it
-/// is an unnamed bit-field or an (array of) empty record(s).
-bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays);
+/// is an unnamed bit-field or an (array of) empty record(s). If
+/// AsIfNoUniqueAddr is true, then C++ record fields are considered empty if
+/// the [[no_unique_address]] attribute would have made them empty.
+bool isEmptyField(ASTContext &Context, const FieldDecl *FD, bool AllowArrays,
+ bool AsIfNoUniqueAddr = false);
/// isEmptyRecord - Return true iff a structure contains only empty
/// fields. Note that a structure with a flexible array member is not
-/// considered empty.
-bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
+/// considered empty. If AsIfNoUniqueAddr is true, then C++ record fields are
+/// considered empty if the [[no_unique_address]] attribute would have made
+/// them empty.
+bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays,
+ bool AsIfNoUniqueAddr = false);
/// isSingleElementStruct - Determine if a structure is a "single
/// element struct", i.e. it has exactly one non-empty field or
diff --git a/clang/lib/CodeGen/BackendConsumer.h b/clang/lib/CodeGen/BackendConsumer.h
new file mode 100644
index 000000000000..72a814cd43d7
--- /dev/null
+++ b/clang/lib/CodeGen/BackendConsumer.h
@@ -0,0 +1,166 @@
+//===--- BackendConsumer.h - LLVM BackendConsumer Header File -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_BACKENDCONSUMER_H
+#define LLVM_CLANG_LIB_CODEGEN_BACKENDCONSUMER_H
+
+#include "clang/CodeGen/BackendUtil.h"
+#include "clang/CodeGen/CodeGenAction.h"
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/Timer.h"
+
+namespace llvm {
+ class DiagnosticInfoDontCall;
+}
+
+namespace clang {
+class ASTContext;
+class CodeGenAction;
+class CoverageSourceInfo;
+
+class BackendConsumer : public ASTConsumer {
+ using LinkModule = CodeGenAction::LinkModule;
+
+ virtual void anchor();
+ DiagnosticsEngine &Diags;
+ BackendAction Action;
+ const HeaderSearchOptions &HeaderSearchOpts;
+ const CodeGenOptions &CodeGenOpts;
+ const TargetOptions &TargetOpts;
+ const LangOptions &LangOpts;
+ std::unique_ptr<raw_pwrite_stream> AsmOutStream;
+ ASTContext *Context;
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
+
+ llvm::Timer LLVMIRGeneration;
+ unsigned LLVMIRGenerationRefCount;
+
+ /// True if we've finished generating IR. This prevents us from generating
+ /// additional LLVM IR after emitting output in HandleTranslationUnit. This
+ /// can happen when Clang plugins trigger additional AST deserialization.
+ bool IRGenFinished = false;
+
+ bool TimerIsEnabled = false;
+
+ std::unique_ptr<CodeGenerator> Gen;
+
+ SmallVector<LinkModule, 4> LinkModules;
+
+ // A map from mangled names to their function's source location, used for
+ // backend diagnostics as the Clang AST may be unavailable. We actually use
+ // the mangled name's hash as the key because mangled names can be very
+ // long and take up lots of space. Using a hash can cause name collision,
+ // but that is rare and the consequences are pointing to a wrong source
+ // location which is not severe. This is a vector instead of an actual map
+ // because we optimize for time building this map rather than time
+ // retrieving an entry, as backend diagnostics are uncommon.
+ std::vector<std::pair<llvm::hash_code, FullSourceLoc>>
+ ManglingFullSourceLocs;
+
+
+ // This is here so that the diagnostic printer knows the module a diagnostic
+ // refers to.
+ llvm::Module *CurLinkModule = nullptr;
+
+public:
+ BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const PreprocessorOptions &PPOpts,
+ const CodeGenOptions &CodeGenOpts,
+ const TargetOptions &TargetOpts,
+ const LangOptions &LangOpts, const std::string &InFile,
+ SmallVector<LinkModule, 4> LinkModules,
+ std::unique_ptr<raw_pwrite_stream> OS, llvm::LLVMContext &C,
+ CoverageSourceInfo *CoverageInfo = nullptr);
+
+ // This constructor is used in installing an empty BackendConsumer
+ // to use the clang diagnostic handler for IR input files. It avoids
+ // initializing the OS field.
+ BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const PreprocessorOptions &PPOpts,
+ const CodeGenOptions &CodeGenOpts,
+ const TargetOptions &TargetOpts,
+ const LangOptions &LangOpts, llvm::Module *Module,
+ SmallVector<LinkModule, 4> LinkModules, llvm::LLVMContext &C,
+ CoverageSourceInfo *CoverageInfo = nullptr);
+
+ llvm::Module *getModule() const;
+ std::unique_ptr<llvm::Module> takeModule();
+
+ CodeGenerator *getCodeGenerator();
+
+ void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override;
+ void Initialize(ASTContext &Ctx) override;
+ bool HandleTopLevelDecl(DeclGroupRef D) override;
+ void HandleInlineFunctionDefinition(FunctionDecl *D) override;
+ void HandleInterestingDecl(DeclGroupRef D) override;
+ void HandleTranslationUnit(ASTContext &C) override;
+ void HandleTagDeclDefinition(TagDecl *D) override;
+ void HandleTagDeclRequiredDefinition(const TagDecl *D) override;
+ void CompleteTentativeDefinition(VarDecl *D) override;
+ void CompleteExternalDeclaration(VarDecl *D) override;
+ void AssignInheritanceModel(CXXRecordDecl *RD) override;
+ void HandleVTable(CXXRecordDecl *RD) override;
+
+
+ // Links each entry in LinkModules into our module. Returns true on error.
+ bool LinkInModules(llvm::Module *M, bool ShouldLinkFiles = true);
+
+ /// Get the best possible source location to represent a diagnostic that
+ /// may have associated debug info.
+ const FullSourceLoc getBestLocationFromDebugLoc(
+ const llvm::DiagnosticInfoWithLocationBase &D,
+ bool &BadDebugInfo, StringRef &Filename,
+ unsigned &Line, unsigned &Column) const;
+
+ std::optional<FullSourceLoc> getFunctionSourceLocation(
+ const llvm::Function &F) const;
+
+ void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI);
+ /// Specialized handler for InlineAsm diagnostic.
+ /// \return True if the diagnostic has been successfully reported, false
+ /// otherwise.
+ bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D);
+ /// Specialized handler for diagnostics reported using SMDiagnostic.
+ void SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &D);
+ /// Specialized handler for StackSize diagnostic.
+ /// \return True if the diagnostic has been successfully reported, false
+ /// otherwise.
+ bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
+ /// Specialized handler for ResourceLimit diagnostic.
+ /// \return True if the diagnostic has been successfully reported, false
+ /// otherwise.
+ bool ResourceLimitDiagHandler(const llvm::DiagnosticInfoResourceLimit &D);
+
+ /// Specialized handler for unsupported backend feature diagnostic.
+ void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
+ /// Specialized handlers for optimization remarks.
+ /// Note that these handlers only accept remarks and they always handle
+ /// them.
+ void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D,
+ unsigned DiagID);
+ void
+ OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D);
+ void OptimizationRemarkHandler(
+ const llvm::OptimizationRemarkAnalysisFPCommute &D);
+ void OptimizationRemarkHandler(
+ const llvm::OptimizationRemarkAnalysisAliasing &D);
+ void OptimizationFailureHandler(
+ const llvm::DiagnosticInfoOptimizationFailure &D);
+ void DontCallDiagHandler(const llvm::DiagnosticInfoDontCall &D);
+ /// Specialized handler for misexpect warnings.
+ /// Note that misexpect remarks are emitted through ORE
+ void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D);
+};
+
+} // namespace clang
+#endif
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index cda03d69522d..8c666e2cb463 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/BackendUtil.h"
+#include "BackendConsumer.h"
+#include "LinkInModulesPass.h"
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
@@ -27,6 +29,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Frontend/Driver/CodeGenOptions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -55,6 +58,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
@@ -70,6 +74,7 @@
#include "llvm/Transforms/Instrumentation/KCFI.h"
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
@@ -90,19 +95,24 @@ using namespace llvm;
#include "llvm/Support/Extension.def"
namespace llvm {
-extern cl::opt<bool> DebugInfoCorrelate;
+extern cl::opt<bool> PrintPipelinePasses;
// Experiment to move sanitizers earlier.
static cl::opt<bool> ClSanitizeOnOptimizerEarlyEP(
"sanitizer-early-opt-ep", cl::Optional,
cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false));
+
+// Re-link builtin bitcodes after optimization
+cl::opt<bool> ClRelinkBuiltinBitcodePostop(
+ "relink-builtin-bitcode-postop", cl::Optional,
+ cl::desc("Re-link builtin bitcodes after optimization."), cl::init(false));
}
namespace {
// Default filename used for profile generation.
std::string getDefaultProfileGenName() {
- return DebugInfoCorrelate ? "default_%p.proflite" : "default_%m.profraw";
+ return DebugInfoCorrelate ? "default_%m.proflite" : "default_%m.profraw";
}
class EmitAssemblyHelper {
@@ -111,7 +121,7 @@ class EmitAssemblyHelper {
const CodeGenOptions &CodeGenOpts;
const clang::TargetOptions &TargetOpts;
const LangOptions &LangOpts;
- Module *TheModule;
+ llvm::Module *TheModule;
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS;
Timer CodeGenerationTime;
@@ -154,10 +164,9 @@ class EmitAssemblyHelper {
return F;
}
- void
- RunOptimizationPipeline(BackendAction Action,
- std::unique_ptr<raw_pwrite_stream> &OS,
- std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS);
+ void RunOptimizationPipeline(
+ BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS,
+ std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS, BackendConsumer *BC);
void RunCodegenPipeline(BackendAction Action,
std::unique_ptr<raw_pwrite_stream> &OS,
std::unique_ptr<llvm::ToolOutputFile> &DwoOS);
@@ -177,7 +186,7 @@ public:
const HeaderSearchOptions &HeaderSearchOpts,
const CodeGenOptions &CGOpts,
const clang::TargetOptions &TOpts,
- const LangOptions &LOpts, Module *M,
+ const LangOptions &LOpts, llvm::Module *M,
IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS)
: Diags(_Diags), HSOpts(HeaderSearchOpts), CodeGenOpts(CGOpts),
TargetOpts(TOpts), LangOpts(LOpts), TheModule(M), VFS(std::move(VFS)),
@@ -192,8 +201,8 @@ public:
std::unique_ptr<TargetMachine> TM;
// Emit output using the new pass manager for the optimization pipeline.
- void EmitAssembly(BackendAction Action,
- std::unique_ptr<raw_pwrite_stream> OS);
+ void EmitAssembly(BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS,
+ BackendConsumer *BC);
};
}
@@ -256,45 +265,6 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) {
return false;
}
-static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
- const CodeGenOptions &CodeGenOpts) {
- TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple);
-
- switch (CodeGenOpts.getVecLib()) {
- case CodeGenOptions::Accelerate:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate,
- TargetTriple);
- break;
- case CodeGenOptions::LIBMVEC:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86,
- TargetTriple);
- break;
- case CodeGenOptions::MASSV:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV,
- TargetTriple);
- break;
- case CodeGenOptions::SVML:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML,
- TargetTriple);
- break;
- case CodeGenOptions::SLEEF:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,
- TargetTriple);
- break;
- case CodeGenOptions::Darwin_libsystem_m:
- TLII->addVectorizableFunctionsFromVecLib(
- TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
- break;
- case CodeGenOptions::ArmPL:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL,
- TargetTriple);
- break;
- default:
- break;
- }
- return TLII;
-}
-
static std::optional<llvm::CodeModel::Model>
getCodeModel(const CodeGenOptions &CodeGenOpts) {
unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
@@ -313,12 +283,12 @@ getCodeModel(const CodeGenOptions &CodeGenOpts) {
static CodeGenFileType getCodeGenFileType(BackendAction Action) {
if (Action == Backend_EmitObj)
- return CGFT_ObjectFile;
+ return CodeGenFileType::ObjectFile;
else if (Action == Backend_EmitMCNull)
- return CGFT_Null;
+ return CodeGenFileType::Null;
else {
assert(Action == Backend_EmitAssembly && "Invalid action!");
- return CGFT_AssemblyFile;
+ return CodeGenFileType::AssemblyFile;
}
}
@@ -486,6 +456,8 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
Options.MCOptions.Argv0 = CodeGenOpts.Argv0;
Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs;
Options.MCOptions.AsSecureLogFile = CodeGenOpts.AsSecureLogFile;
+ Options.MCOptions.PPCUseFullRegisterNames =
+ CodeGenOpts.PPCUseFullRegisterNames;
Options.MisExpect = CodeGenOpts.MisExpect;
return true;
@@ -560,10 +532,10 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
std::string FeaturesStr =
llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
llvm::Reloc::Model RM = CodeGenOpts.RelocationModel;
- std::optional<CodeGenOpt::Level> OptLevelOrNone =
+ std::optional<CodeGenOptLevel> OptLevelOrNone =
CodeGenOpt::getLevel(CodeGenOpts.OptimizationLevel);
assert(OptLevelOrNone && "Invalid optimization level!");
- CodeGenOpt::Level OptLevel = *OptLevelOrNone;
+ CodeGenOptLevel OptLevel = *OptLevelOrNone;
llvm::TargetOptions Options;
if (!initTargetOptions(Diags, Options, CodeGenOpts, TargetOpts, LangOpts,
@@ -571,6 +543,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
return;
TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr,
Options, RM, CM, OptLevel));
+ TM->setLargeDataThreshold(CodeGenOpts.LargeDataThreshold);
}
bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
@@ -579,7 +552,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
raw_pwrite_stream *DwoOS) {
// Add LibraryInfo.
std::unique_ptr<TargetLibraryInfoImpl> TLII(
- createTLII(TargetTriple, CodeGenOpts));
+ llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib()));
CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII));
// Normal mode, emit a .s or .o file by running the code generator. Note,
@@ -688,7 +661,7 @@ static void addSanitizers(const Triple &TargetTriple,
// the logic of the original code, but operates on "shadow" values. It
// can benefit from re-running some general purpose optimization
// passes.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>());
FunctionPassManager FPM;
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
FPM.addPass(InstCombinePass());
@@ -747,7 +720,7 @@ static void addSanitizers(const Triple &TargetTriple,
SanitizersCallback(NewMPM, Level);
if (!NewMPM.isEmpty()) {
// Sanitizers can abandon<GlobalsAA>.
- NewMPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ NewMPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>());
MPM.addPass(std::move(NewMPM));
}
});
@@ -759,7 +732,7 @@ static void addSanitizers(const Triple &TargetTriple,
void EmitAssemblyHelper::RunOptimizationPipeline(
BackendAction Action, std::unique_ptr<raw_pwrite_stream> &OS,
- std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS) {
+ std::unique_ptr<llvm::ToolOutputFile> &ThinLinkOS, BackendConsumer *BC) {
std::optional<PGOOptions> PGOOpt;
if (CodeGenOpts.hasProfileIRInstr())
@@ -768,7 +741,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName()
: CodeGenOpts.InstrProfileOutput,
"", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr,
- PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling);
+ PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling,
+ /*PseudoProbeForProfiling=*/false, CodeGenOpts.AtomicProfileUpdate);
else if (CodeGenOpts.hasProfileIRUse()) {
// -fprofile-use.
auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse
@@ -902,6 +876,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
<< PluginFN << toString(PassPlugin.takeError());
}
}
+ for (auto PassCallback : CodeGenOpts.PassBuilderCallbacks)
+ PassCallback(PB);
#define HANDLE_EXTENSION(Ext) \
get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB);
#include "llvm/Support/Extension.def"
@@ -909,7 +885,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
// Register the target library analysis directly and give it a customized
// preset TLI.
std::unique_ptr<TargetLibraryInfoImpl> TLII(
- createTLII(TargetTriple, CodeGenOpts));
+ llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib()));
FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
// Register all the basic analyses with the managers.
@@ -920,14 +896,17 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
ModulePassManager MPM;
+ // Add a verifier pass, before any other passes, to catch CodeGen issues.
+ if (CodeGenOpts.VerifyModule)
+ MPM.addPass(VerifierPass());
if (!CodeGenOpts.DisableLLVMPasses) {
// Map our optimization levels into one of the distinct levels used to
// configure the pipeline.
OptimizationLevel Level = mapToLevel(CodeGenOpts);
- bool IsThinLTO = CodeGenOpts.PrepareForThinLTO;
- bool IsLTO = CodeGenOpts.PrepareForLTO;
+ const bool PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO;
+ const bool PrepareForLTO = CodeGenOpts.PrepareForLTO;
if (LangOpts.ObjCAutoRefCount) {
PB.registerPipelineStartEPCallback(
@@ -1016,30 +995,37 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
});
}
- bool IsThinOrUnifiedLTO = IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO);
if (CodeGenOpts.FatLTO) {
- MPM = PB.buildFatLTODefaultPipeline(Level, IsThinOrUnifiedLTO,
- IsThinOrUnifiedLTO ||
- shouldEmitRegularLTOSummary());
- } else if (IsThinOrUnifiedLTO) {
- MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level);
- } else if (IsLTO) {
- MPM = PB.buildLTOPreLinkDefaultPipeline(Level);
+ assert(CodeGenOpts.UnifiedLTO && "FatLTO requires UnifiedLTO");
+ MPM.addPass(PB.buildFatLTODefaultPipeline(Level));
+ } else if (PrepareForThinLTO) {
+ MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(Level));
+ } else if (PrepareForLTO) {
+ MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(Level));
} else {
- MPM = PB.buildPerModuleDefaultPipeline(Level);
+ MPM.addPass(PB.buildPerModuleDefaultPipeline(Level));
}
}
+ // Re-link against any bitcodes supplied via the -mlink-builtin-bitcode option
+ // Some optimizations may generate new function calls that would not have
+ // been linked pre-optimization (i.e. fused sincos calls generated by
+ // AMDGPULibCalls::fold_sincos.)
+ if (ClRelinkBuiltinBitcodePostop)
+ MPM.addPass(LinkInModulesPass(BC, false));
+
// Add a verifier pass if requested. We don't have to do this if the action
// requires code generation because there will already be a verifier pass in
// the code-generation pipeline.
+ // Since we already added a verifier pass above, this
+ // might even not run the analysis, if previous passes caused no changes.
if (!actionRequiresCodeGen(Action) && CodeGenOpts.VerifyModule)
MPM.addPass(VerifierPass());
if (Action == Backend_EmitBC || Action == Backend_EmitLL) {
if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
- TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
CodeGenOpts.EnableSplitLTOUnit);
if (Action == Backend_EmitBC) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
@@ -1048,26 +1034,25 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
return;
}
if (CodeGenOpts.UnifiedLTO)
- TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1));
+ TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
MPM.addPass(ThinLTOBitcodeWriterPass(
*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
} else {
MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists,
/*EmitLTOSummary=*/true));
}
-
} else {
// Emit a module summary by default for Regular LTO except for ld64
// targets
bool EmitLTOSummary = shouldEmitRegularLTOSummary();
if (EmitLTOSummary) {
if (!TheModule->getModuleFlag("ThinLTO") && !CodeGenOpts.UnifiedLTO)
- TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ TheModule->addModuleFlag(llvm::Module::Error, "ThinLTO", uint32_t(0));
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
- TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
uint32_t(1));
if (CodeGenOpts.UnifiedLTO)
- TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1));
+ TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
}
if (Action == Backend_EmitBC)
MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
@@ -1080,19 +1065,32 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (CodeGenOpts.FatLTO) {
// Set module flags, like EnableSplitLTOUnit and UnifiedLTO, since FatLTO
// uses a different action than Backend_EmitBC or Backend_EmitLL.
- bool IsThinOrUnifiedLTO =
- CodeGenOpts.PrepareForThinLTO ||
- (CodeGenOpts.PrepareForLTO && CodeGenOpts.UnifiedLTO);
if (!TheModule->getModuleFlag("ThinLTO"))
- TheModule->addModuleFlag(Module::Error, "ThinLTO",
- uint32_t(IsThinOrUnifiedLTO));
+ TheModule->addModuleFlag(llvm::Module::Error, "ThinLTO",
+ uint32_t(CodeGenOpts.PrepareForThinLTO));
if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
- TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ TheModule->addModuleFlag(llvm::Module::Error, "EnableSplitLTOUnit",
uint32_t(CodeGenOpts.EnableSplitLTOUnit));
- if (CodeGenOpts.UnifiedLTO && !TheModule->getModuleFlag("UnifiedLTO"))
- TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1));
+ // FatLTO always means UnifiedLTO
+ if (!TheModule->getModuleFlag("UnifiedLTO"))
+ TheModule->addModuleFlag(llvm::Module::Error, "UnifiedLTO", uint32_t(1));
}
+ // Print a textual, '-passes=' compatible, representation of pipeline if
+ // requested.
+ if (PrintPipelinePasses) {
+ MPM.printPipeline(outs(), [&PIC](StringRef ClassName) {
+ auto PassName = PIC.getPassNameForClassName(ClassName);
+ return PassName.empty() ? ClassName : PassName;
+ });
+ outs() << "\n";
+ return;
+ }
+
+ if (LangOpts.HIPStdPar && !LangOpts.CUDAIsDevice &&
+ LangOpts.HIPStdParInterposeAlloc)
+ MPM.addPass(HipStdParAllocationInterpositionPass());
+
// Now that we have all of the passes ready, run them.
{
PrettyStackTraceString CrashInfo("Optimizer");
@@ -1130,6 +1128,13 @@ void EmitAssemblyHelper::RunCodegenPipeline(
return;
}
+ // If -print-pipeline-passes is requested, don't run the legacy pass manager.
+ // FIXME: when codegen is switched to use the new pass manager, it should also
+ // emit pass names here.
+ if (PrintPipelinePasses) {
+ return;
+ }
+
{
PrettyStackTraceString CrashInfo("Code generation");
llvm::TimeTraceScope TimeScope("CodeGenPasses");
@@ -1138,7 +1143,8 @@ void EmitAssemblyHelper::RunCodegenPipeline(
}
void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
- std::unique_ptr<raw_pwrite_stream> OS) {
+ std::unique_ptr<raw_pwrite_stream> OS,
+ BackendConsumer *BC) {
TimeRegion Region(CodeGenOpts.TimePasses ? &CodeGenerationTime : nullptr);
setCommandLineOpts(CodeGenOpts);
@@ -1154,7 +1160,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
cl::PrintOptionValues();
std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
- RunOptimizationPipeline(Action, OS, ThinLinkOS);
+ RunOptimizationPipeline(Action, OS, ThinLinkOS, BC);
RunCodegenPipeline(Action, OS, DwoOS);
if (ThinLinkOS)
@@ -1164,12 +1170,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
}
static void runThinLTOBackend(
- DiagnosticsEngine &Diags, ModuleSummaryIndex *CombinedIndex, Module *M,
- const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts,
- const clang::TargetOptions &TOpts, const LangOptions &LOpts,
- std::unique_ptr<raw_pwrite_stream> OS, std::string SampleProfile,
- std::string ProfileRemapping, BackendAction Action) {
- StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>>
+ DiagnosticsEngine &Diags, ModuleSummaryIndex *CombinedIndex,
+ llvm::Module *M, const HeaderSearchOptions &HeaderOpts,
+ const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts,
+ const LangOptions &LOpts, std::unique_ptr<raw_pwrite_stream> OS,
+ std::string SampleProfile, std::string ProfileRemapping,
+ BackendAction Action) {
+ DenseMap<StringRef, DenseMap<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@@ -1200,7 +1207,7 @@ static void runThinLTOBackend(
Conf.CodeModel = getCodeModel(CGOpts);
Conf.MAttrs = TOpts.Features;
Conf.RelocModel = CGOpts.RelocationModel;
- std::optional<CodeGenOpt::Level> OptLevelOrNone =
+ std::optional<CodeGenOptLevel> OptLevelOrNone =
CodeGenOpt::getLevel(CGOpts.OptimizationLevel);
assert(OptLevelOrNone && "Invalid optimization level!");
Conf.CGOptLevel = *OptLevelOrNone;
@@ -1237,18 +1244,18 @@ static void runThinLTOBackend(
Conf.SplitDwarfOutput = CGOpts.SplitDwarfOutput;
switch (Action) {
case Backend_EmitNothing:
- Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) {
+ Conf.PreCodeGenModuleHook = [](size_t Task, const llvm::Module &Mod) {
return false;
};
break;
case Backend_EmitLL:
- Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
+ Conf.PreCodeGenModuleHook = [&](size_t Task, const llvm::Module &Mod) {
M->print(*OS, nullptr, CGOpts.EmitLLVMUseLists);
return false;
};
break;
case Backend_EmitBC:
- Conf.PreCodeGenModuleHook = [&](size_t Task, const Module &Mod) {
+ Conf.PreCodeGenModuleHook = [&](size_t Task, const llvm::Module &Mod) {
WriteBitcodeToFile(*M, *OS, CGOpts.EmitLLVMUseLists);
return false;
};
@@ -1267,14 +1274,12 @@ static void runThinLTOBackend(
}
}
-void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
- const HeaderSearchOptions &HeaderOpts,
- const CodeGenOptions &CGOpts,
- const clang::TargetOptions &TOpts,
- const LangOptions &LOpts, StringRef TDesc,
- Module *M, BackendAction Action,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
- std::unique_ptr<raw_pwrite_stream> OS) {
+void clang::EmitBackendOutput(
+ DiagnosticsEngine &Diags, const HeaderSearchOptions &HeaderOpts,
+ const CodeGenOptions &CGOpts, const clang::TargetOptions &TOpts,
+ const LangOptions &LOpts, StringRef TDesc, llvm::Module *M,
+ BackendAction Action, IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+ std::unique_ptr<raw_pwrite_stream> OS, BackendConsumer *BC) {
llvm::TimeTraceScope TimeScope("Backend");
@@ -1317,7 +1322,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
}
EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M, VFS);
- AsmHelper.EmitAssembly(Action, std::move(OS));
+ AsmHelper.EmitAssembly(Action, std::move(OS), BC);
// Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's
// DataLayout.
@@ -1352,7 +1357,7 @@ void clang::EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts,
for (StringRef OffloadObject : CGOpts.OffloadObjects) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ObjectOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(OffloadObject);
- if (std::error_code EC = ObjectOrErr.getError()) {
+ if (ObjectOrErr.getError()) {
auto DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
"could not open '%0' for embedding");
Diags.Report(DiagID) << OffloadObject;
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 222b0a192c85..52e6ddb7d6af 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -87,8 +87,7 @@ namespace {
llvm::Value *StoragePtr = CGF.Builder.CreateConstGEP1_64(
CGF.Int8Ty, BitFieldPtr, OffsetInChars.getQuantity());
StoragePtr = CGF.Builder.CreateAddrSpaceCast(
- StoragePtr, llvm::PointerType::getUnqual(CGF.getLLVMContext()),
- "atomic_bitfield_base");
+ StoragePtr, CGF.UnqualPtrTy, "atomic_bitfield_base");
BFI = OrigBFI;
BFI.Offset = Offset;
BFI.StorageSize = AtomicSizeInBits;
@@ -102,9 +101,9 @@ namespace {
llvm::APInt Size(
/*numBits=*/32,
C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity());
- AtomicTy =
- C.getConstantArrayType(C.CharTy, Size, nullptr, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ AtomicTy = C.getConstantArrayType(C.CharTy, Size, nullptr,
+ ArraySizeModifier::Normal,
+ /*IndexTypeQuals=*/0);
}
AtomicAlign = ValueAlign = lvalue.getAlignment();
} else if (lvalue.isVectorElt()) {
@@ -384,8 +383,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
- Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder,
- Scope);
+ Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope);
Pair->setVolatile(E->isVolatile());
Pair->setWeak(IsWeak);
@@ -509,9 +507,11 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
default:
llvm_unreachable("Unexpected min/max operation");
case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT;
break;
case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT;
break;
}
@@ -546,7 +546,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
- case AtomicExpr::AO__atomic_compare_exchange_n: {
+ case AtomicExpr::AO__atomic_compare_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
Val1, Val2, FailureOrder, Size, Order, Scope);
@@ -579,7 +581,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
- case AtomicExpr::AO__atomic_load: {
+ case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load_n:
+ case AtomicExpr::AO__scoped_atomic_load: {
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
Load->setAtomic(Order, Scope);
Load->setVolatile(E->isVolatile());
@@ -591,7 +595,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_store:
case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
- case AtomicExpr::AO__atomic_store_n: {
+ case AtomicExpr::AO__atomic_store_n:
+ case AtomicExpr::AO__scoped_atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store_n: {
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
Store->setAtomic(Order, Scope);
@@ -604,10 +610,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_exchange:
Op = llvm::AtomicRMWInst::Xchg;
break;
case AtomicExpr::AO__atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
: llvm::Instruction::Add;
[[fallthrough]];
@@ -615,11 +624,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
: llvm::AtomicRMWInst::Add;
break;
case AtomicExpr::AO__atomic_sub_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
: llvm::Instruction::Sub;
[[fallthrough]];
@@ -627,17 +638,20 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
: llvm::AtomicRMWInst::Sub;
break;
case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
PostOpMinMax = true;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
Op = E->getValueType()->isFloatingType()
? llvm::AtomicRMWInst::FMin
: (E->getValueType()->isSignedIntegerType()
@@ -646,12 +660,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
PostOpMinMax = true;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
Op = E->getValueType()->isFloatingType()
? llvm::AtomicRMWInst::FMax
: (E->getValueType()->isSignedIntegerType()
@@ -660,47 +676,55 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
break;
case AtomicExpr::AO__atomic_and_fetch:
+ case AtomicExpr::AO__scoped_atomic_and_fetch:
PostOp = llvm::Instruction::And;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__scoped_atomic_fetch_and:
Op = llvm::AtomicRMWInst::And;
break;
case AtomicExpr::AO__atomic_or_fetch:
+ case AtomicExpr::AO__scoped_atomic_or_fetch:
PostOp = llvm::Instruction::Or;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__scoped_atomic_fetch_or:
Op = llvm::AtomicRMWInst::Or;
break;
case AtomicExpr::AO__atomic_xor_fetch:
+ case AtomicExpr::AO__scoped_atomic_xor_fetch:
PostOp = llvm::Instruction::Xor;
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__scoped_atomic_fetch_xor:
Op = llvm::AtomicRMWInst::Xor;
break;
case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__scoped_atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
[[fallthrough]];
case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
+ case AtomicExpr::AO__scoped_atomic_fetch_nand:
Op = llvm::AtomicRMWInst::Nand;
break;
}
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::AtomicRMWInst *RMWI =
- CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope);
+ CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope);
RMWI->setVolatile(E->isVolatile());
// For __atomic_*_fetch operations, perform the operation again to
@@ -713,7 +737,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
else if (PostOp)
Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI,
LoadVal1);
- if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
+ if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch)
Result = CGF.Builder.CreateNot(Result);
CGF.Builder.CreateStore(Result, Dest);
}
@@ -862,41 +887,50 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled above with EmitAtomicInit!");
+ case AtomicExpr::AO__atomic_load_n:
+ case AtomicExpr::AO__scoped_atomic_load_n:
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
case AtomicExpr::AO__hip_atomic_load:
- case AtomicExpr::AO__atomic_load_n:
break;
case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load:
Dest = EmitPointerWithAlignment(E->getVal1());
break;
case AtomicExpr::AO__atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store:
Val1 = EmitPointerWithAlignment(E->getVal1());
break;
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
Dest = EmitPointerWithAlignment(E->getVal2());
break;
- case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__atomic_compare_exchange:
+ case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
- case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
- case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
- case AtomicExpr::AO__atomic_compare_exchange_n:
- case AtomicExpr::AO__atomic_compare_exchange:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
Val1 = EmitPointerWithAlignment(E->getVal1());
- if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+ if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
Val2 = EmitPointerWithAlignment(E->getVal2());
else
Val2 = EmitValToTemp(*this, E->getVal2());
OrderFail = EmitScalarExpr(E->getOrderFail());
if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
- E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
+ E->getOp() == AtomicExpr::AO__atomic_compare_exchange ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange_n ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_compare_exchange)
IsWeak = EmitScalarExpr(E->getWeak());
break;
@@ -936,35 +970,53 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
ShouldCastToIntPtrTy = !MemTy->isFloatingType();
[[fallthrough]];
- case AtomicExpr::AO__c11_atomic_store:
- case AtomicExpr::AO__c11_atomic_exchange:
- case AtomicExpr::AO__opencl_atomic_store:
- case AtomicExpr::AO__hip_atomic_store:
- case AtomicExpr::AO__opencl_atomic_exchange:
- case AtomicExpr::AO__hip_atomic_exchange:
+ case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__atomic_fetch_nand:
+ case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__atomic_and_fetch:
+ case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__atomic_or_fetch:
+ case AtomicExpr::AO__atomic_xor_fetch:
case AtomicExpr::AO__atomic_store_n:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_xor:
- case AtomicExpr::AO__c11_atomic_fetch_nand:
- case AtomicExpr::AO__opencl_atomic_fetch_and:
- case AtomicExpr::AO__opencl_atomic_fetch_or:
- case AtomicExpr::AO__opencl_atomic_fetch_xor:
- case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__c11_atomic_store:
+ case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__hip_atomic_fetch_and:
- case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
- case AtomicExpr::AO__atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
- case AtomicExpr::AO__atomic_fetch_nand:
- case AtomicExpr::AO__atomic_and_fetch:
- case AtomicExpr::AO__atomic_or_fetch:
- case AtomicExpr::AO__atomic_xor_fetch:
- case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__hip_atomic_store:
+ case AtomicExpr::AO__hip_atomic_exchange:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_fetch_and:
+ case AtomicExpr::AO__scoped_atomic_fetch_nand:
+ case AtomicExpr::AO__scoped_atomic_fetch_or:
+ case AtomicExpr::AO__scoped_atomic_fetch_xor:
+ case AtomicExpr::AO__scoped_atomic_and_fetch:
+ case AtomicExpr::AO__scoped_atomic_nand_fetch:
+ case AtomicExpr::AO__scoped_atomic_or_fetch:
+ case AtomicExpr::AO__scoped_atomic_xor_fetch:
+ case AtomicExpr::AO__scoped_atomic_store_n:
+ case AtomicExpr::AO__scoped_atomic_exchange_n:
Val1 = EmitValToTemp(*this, E->getVal1());
break;
}
@@ -1003,44 +1055,60 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_init:
llvm_unreachable("Already handled above with EmitAtomicInit!");
- case AtomicExpr::AO__c11_atomic_fetch_add:
- case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
- case AtomicExpr::AO__hip_atomic_fetch_add:
- case AtomicExpr::AO__c11_atomic_fetch_and:
- case AtomicExpr::AO__opencl_atomic_fetch_and:
- case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
- case AtomicExpr::AO__c11_atomic_fetch_or:
- case AtomicExpr::AO__opencl_atomic_fetch_or:
- case AtomicExpr::AO__hip_atomic_fetch_or:
- case AtomicExpr::AO__atomic_fetch_or:
- case AtomicExpr::AO__c11_atomic_fetch_nand:
+ case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_nand:
- case AtomicExpr::AO__c11_atomic_fetch_sub:
- case AtomicExpr::AO__opencl_atomic_fetch_sub:
+ case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_sub:
- case AtomicExpr::AO__hip_atomic_fetch_sub:
- case AtomicExpr::AO__c11_atomic_fetch_xor:
- case AtomicExpr::AO__opencl_atomic_fetch_xor:
- case AtomicExpr::AO__opencl_atomic_fetch_min:
- case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_xor:
- case AtomicExpr::AO__hip_atomic_fetch_xor:
- case AtomicExpr::AO__c11_atomic_fetch_max:
- case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__atomic_add_fetch:
case AtomicExpr::AO__atomic_and_fetch:
+ case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__atomic_min_fetch:
case AtomicExpr::AO__atomic_nand_fetch:
case AtomicExpr::AO__atomic_or_fetch:
case AtomicExpr::AO__atomic_sub_fetch:
case AtomicExpr::AO__atomic_xor_fetch:
- case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__c11_atomic_fetch_max:
+ case AtomicExpr::AO__c11_atomic_fetch_min:
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
+ case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__hip_atomic_fetch_add:
+ case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_max:
- case AtomicExpr::AO__atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
- case AtomicExpr::AO__atomic_max_fetch:
- case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__hip_atomic_fetch_or:
+ case AtomicExpr::AO__hip_atomic_fetch_sub:
+ case AtomicExpr::AO__hip_atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_add:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_max:
+ case AtomicExpr::AO__opencl_atomic_fetch_min:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_and:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_nand:
+ case AtomicExpr::AO__scoped_atomic_fetch_or:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_fetch_xor:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_and_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_nand_fetch:
+ case AtomicExpr::AO__scoped_atomic_or_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
+ case AtomicExpr::AO__scoped_atomic_xor_fetch:
// For these, only library calls for certain sizes exist.
UseOptimizedLibcall = true;
break;
@@ -1049,30 +1117,38 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_exchange:
case AtomicExpr::AO__atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_load:
+ case AtomicExpr::AO__scoped_atomic_store:
+ case AtomicExpr::AO__scoped_atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
// Use the generic version if we don't know that the operand will be
// suitably aligned for the optimized version.
if (Misaligned)
break;
[[fallthrough]];
+ case AtomicExpr::AO__atomic_load_n:
+ case AtomicExpr::AO__atomic_store_n:
+ case AtomicExpr::AO__atomic_exchange_n:
+ case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_load:
+ case AtomicExpr::AO__hip_atomic_store:
+ case AtomicExpr::AO__hip_atomic_exchange:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_load:
- case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__opencl_atomic_store:
- case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__opencl_atomic_exchange:
- case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
- case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
- case AtomicExpr::AO__atomic_load_n:
- case AtomicExpr::AO__atomic_store_n:
- case AtomicExpr::AO__atomic_exchange_n:
- case AtomicExpr::AO__atomic_compare_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_load_n:
+ case AtomicExpr::AO__scoped_atomic_store_n:
+ case AtomicExpr::AO__scoped_atomic_exchange_n:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
// Only use optimized library calls for sizes for which they exist.
// FIXME: Size == 16 optimized library functions exist too.
if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
@@ -1125,14 +1201,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// void *desired, int success, int failure)
// bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
// int success, int failure)
+ case AtomicExpr::AO__atomic_compare_exchange:
+ case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
- case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
- case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
- case AtomicExpr::AO__atomic_compare_exchange:
- case AtomicExpr::AO__atomic_compare_exchange_n:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange:
+ case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
LibCallName = "__atomic_compare_exchange";
RetTy = getContext().BoolTy;
HaveRetTy = true;
@@ -1147,22 +1225,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// void __atomic_exchange(size_t size, void *mem, void *val, void *return,
// int order)
// T __atomic_exchange_N(T *mem, T val, int order)
- case AtomicExpr::AO__c11_atomic_exchange:
- case AtomicExpr::AO__opencl_atomic_exchange:
- case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__atomic_exchange_n:
+ case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__hip_atomic_exchange:
+ case AtomicExpr::AO__opencl_atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange:
+ case AtomicExpr::AO__scoped_atomic_exchange_n:
LibCallName = "__atomic_exchange";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), TInfo.Width);
break;
// void __atomic_store(size_t size, void *mem, void *val, int order)
// void __atomic_store_N(T *mem, T val, int order)
- case AtomicExpr::AO__c11_atomic_store:
- case AtomicExpr::AO__opencl_atomic_store:
- case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
+ case AtomicExpr::AO__c11_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
+ case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store:
+ case AtomicExpr::AO__scoped_atomic_store_n:
LibCallName = "__atomic_store";
RetTy = getContext().VoidTy;
HaveRetTy = true;
@@ -1171,22 +1253,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
break;
// void __atomic_load(size_t size, void *mem, void *return, int order)
// T __atomic_load_N(T *mem, int order)
- case AtomicExpr::AO__c11_atomic_load:
- case AtomicExpr::AO__opencl_atomic_load:
- case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_load_n:
+ case AtomicExpr::AO__c11_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
+ case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load:
+ case AtomicExpr::AO__scoped_atomic_load_n:
LibCallName = "__atomic_load";
break;
// T __atomic_add_fetch_N(T *mem, T val, int order)
// T __atomic_fetch_add_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_add_fetch:
+ case AtomicExpr::AO__scoped_atomic_add_fetch:
PostOp = llvm::Instruction::Add;
[[fallthrough]];
- case AtomicExpr::AO__c11_atomic_fetch_add:
- case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__hip_atomic_fetch_add:
+ case AtomicExpr::AO__opencl_atomic_fetch_add:
+ case AtomicExpr::AO__scoped_atomic_fetch_add:
LibCallName = "__atomic_fetch_add";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
LoweredMemTy, E->getExprLoc(), TInfo.Width);
@@ -1194,12 +1280,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_and_fetch_N(T *mem, T val, int order)
// T __atomic_fetch_and_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_and_fetch:
+ case AtomicExpr::AO__scoped_atomic_and_fetch:
PostOp = llvm::Instruction::And;
[[fallthrough]];
+ case AtomicExpr::AO__atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_and:
- case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__hip_atomic_fetch_and:
- case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__opencl_atomic_fetch_and:
+ case AtomicExpr::AO__scoped_atomic_fetch_and:
LibCallName = "__atomic_fetch_and";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), TInfo.Width);
@@ -1207,12 +1295,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_or_fetch_N(T *mem, T val, int order)
// T __atomic_fetch_or_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_or_fetch:
+ case AtomicExpr::AO__scoped_atomic_or_fetch:
PostOp = llvm::Instruction::Or;
[[fallthrough]];
+ case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_or:
- case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__hip_atomic_fetch_or:
- case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__opencl_atomic_fetch_or:
+ case AtomicExpr::AO__scoped_atomic_fetch_or:
LibCallName = "__atomic_fetch_or";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), TInfo.Width);
@@ -1220,12 +1310,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_sub_fetch_N(T *mem, T val, int order)
// T __atomic_fetch_sub_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_sub_fetch:
+ case AtomicExpr::AO__scoped_atomic_sub_fetch:
PostOp = llvm::Instruction::Sub;
[[fallthrough]];
+ case AtomicExpr::AO__atomic_fetch_sub:
case AtomicExpr::AO__c11_atomic_fetch_sub:
- case AtomicExpr::AO__opencl_atomic_fetch_sub:
case AtomicExpr::AO__hip_atomic_fetch_sub:
- case AtomicExpr::AO__atomic_fetch_sub:
+ case AtomicExpr::AO__opencl_atomic_fetch_sub:
+ case AtomicExpr::AO__scoped_atomic_fetch_sub:
LibCallName = "__atomic_fetch_sub";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
LoweredMemTy, E->getExprLoc(), TInfo.Width);
@@ -1233,21 +1325,25 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_xor_fetch_N(T *mem, T val, int order)
// T __atomic_fetch_xor_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_xor_fetch:
+ case AtomicExpr::AO__scoped_atomic_xor_fetch:
PostOp = llvm::Instruction::Xor;
[[fallthrough]];
+ case AtomicExpr::AO__atomic_fetch_xor:
case AtomicExpr::AO__c11_atomic_fetch_xor:
- case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__hip_atomic_fetch_xor:
- case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__opencl_atomic_fetch_xor:
+ case AtomicExpr::AO__scoped_atomic_fetch_xor:
LibCallName = "__atomic_fetch_xor";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), TInfo.Width);
break;
case AtomicExpr::AO__atomic_min_fetch:
+ case AtomicExpr::AO__scoped_atomic_min_fetch:
PostOpMinMax = true;
[[fallthrough]];
- case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__c11_atomic_fetch_min:
+ case AtomicExpr::AO__scoped_atomic_fetch_min:
case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
LibCallName = E->getValueType()->isSignedIntegerType()
@@ -1257,12 +1353,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LoweredMemTy, E->getExprLoc(), TInfo.Width);
break;
case AtomicExpr::AO__atomic_max_fetch:
+ case AtomicExpr::AO__scoped_atomic_max_fetch:
PostOpMinMax = true;
[[fallthrough]];
- case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
+ case AtomicExpr::AO__scoped_atomic_fetch_max:
LibCallName = E->getValueType()->isSignedIntegerType()
? "__atomic_fetch_max"
: "__atomic_fetch_umax";
@@ -1272,10 +1370,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_nand_fetch_N(T *mem, T val, int order)
// T __atomic_fetch_nand_N(T *mem, T val, int order)
case AtomicExpr::AO__atomic_nand_fetch:
+ case AtomicExpr::AO__scoped_atomic_nand_fetch:
PostOp = llvm::Instruction::And; // the NOT is special cased below
[[fallthrough]];
- case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
+ case AtomicExpr::AO__c11_atomic_fetch_nand:
+ case AtomicExpr::AO__scoped_atomic_fetch_nand:
LibCallName = "__atomic_fetch_nand";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), TInfo.Width);
@@ -1332,7 +1432,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
}
- if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
+ if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch)
ResVal = Builder.CreateNot(ResVal);
Builder.CreateStore(ResVal, Dest.withElementType(ResVal->getType()));
@@ -1349,12 +1450,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
E->getOp() == AtomicExpr::AO__hip_atomic_store ||
E->getOp() == AtomicExpr::AO__atomic_store ||
- E->getOp() == AtomicExpr::AO__atomic_store_n;
+ E->getOp() == AtomicExpr::AO__atomic_store_n ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_store ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_store_n;
bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
E->getOp() == AtomicExpr::AO__hip_atomic_load ||
E->getOp() == AtomicExpr::AO__atomic_load ||
- E->getOp() == AtomicExpr::AO__atomic_load_n;
+ E->getOp() == AtomicExpr::AO__atomic_load_n ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_load ||
+ E->getOp() == AtomicExpr::AO__scoped_atomic_load_n;
if (isa<llvm::ConstantInt>(Order)) {
auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
@@ -1741,8 +1846,7 @@ std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) {
// Do the atomic store.
Address Addr = getAtomicAddressAsAtomicIntPointer();
- auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr.getPointer(),
- ExpectedVal, DesiredVal,
+ auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal,
Success, Failure);
// Other decoration.
Inst->setVolatile(LVal.isVolatileQualified());
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index cfbe3272196e..0cbace7b7f7b 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -66,28 +66,6 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
namespace {
-/// Represents a captured entity that requires extra operations in order for
-/// this entity to be copied or destroyed correctly.
-struct BlockCaptureManagedEntity {
- BlockCaptureEntityKind CopyKind, DisposeKind;
- BlockFieldFlags CopyFlags, DisposeFlags;
- const BlockDecl::Capture *CI;
- const CGBlockInfo::Capture *Capture;
-
- BlockCaptureManagedEntity(BlockCaptureEntityKind CopyType,
- BlockCaptureEntityKind DisposeType,
- BlockFieldFlags CopyFlags,
- BlockFieldFlags DisposeFlags,
- const BlockDecl::Capture &CI,
- const CGBlockInfo::Capture &Capture)
- : CopyKind(CopyType), DisposeKind(DisposeType), CopyFlags(CopyFlags),
- DisposeFlags(DisposeFlags), CI(&CI), Capture(&Capture) {}
-
- bool operator<(const BlockCaptureManagedEntity &Other) const {
- return Capture->getOffset() < Other.Capture->getOffset();
- }
-};
-
enum class CaptureStrKind {
// String for the copy helper.
CopyHelper,
@@ -174,9 +152,8 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
cast<llvm::IntegerType>(CGM.getTypes().ConvertType(C.UnsignedLongTy));
llvm::PointerType *i8p = nullptr;
if (CGM.getLangOpts().OpenCL)
- i8p =
- llvm::Type::getInt8PtrTy(
- CGM.getLLVMContext(), C.getTargetAddressSpace(LangAS::opencl_constant));
+ i8p = llvm::PointerType::get(
+ CGM.getLLVMContext(), C.getTargetAddressSpace(LangAS::opencl_constant));
else
i8p = CGM.VoidPtrTy;
@@ -187,8 +164,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
CGM.getLangOpts().getGC() == LangOptions::NonGC) {
descName = getBlockDescriptorName(blockInfo, CGM);
if (llvm::GlobalValue *desc = CGM.getModule().getNamedValue(descName))
- return llvm::ConstantExpr::getBitCast(desc,
- CGM.getBlockDescriptorType());
+ return desc;
}
// If there isn't an equivalent block descriptor global variable, create a new
@@ -226,8 +202,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
// Signature. Mandatory ObjC-style method descriptor @encode sequence.
std::string typeAtEncoding =
CGM.getContext().getObjCEncodingForBlock(blockInfo.getBlockExpr());
- elements.add(llvm::ConstantExpr::getBitCast(
- CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p));
+ elements.add(CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer());
// GC layout.
if (C.getLangOpts().ObjC) {
@@ -266,7 +241,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
}
- return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType());
+ return global;
}
/*
@@ -832,7 +807,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
llvm::Constant *blockISA = blockInfo.NoEscape
? CGM.getNSConcreteGlobalBlock()
: CGM.getNSConcreteStackBlock();
- isa = llvm::ConstantExpr::getBitCast(blockISA, VoidPtrTy);
+ isa = blockISA;
// Build the block descriptor.
descriptor = buildBlockDescriptor(CGM, blockInfo);
@@ -964,7 +939,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
if (CI.isNested())
byrefPointer = Builder.CreateLoad(src, "byref.capture");
else
- byrefPointer = Builder.CreateBitCast(src.getPointer(), VoidPtrTy);
+ byrefPointer = src.getPointer();
// Write that void* into the capture field.
Builder.CreateStore(byrefPointer, blockField);
@@ -1017,7 +992,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// Fake up a new variable so that EmitScalarInit doesn't think
// we're referring to the variable in its own initializer.
ImplicitParamDecl BlockFieldPseudoVar(getContext(), type,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
// We use one of these or the other depending on whether the
// reference is nested.
@@ -1212,8 +1187,8 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
}
} else {
// Bitcast the block literal to a generic block literal.
- BlockPtr = Builder.CreatePointerCast(
- BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal");
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, UnqualPtrTy, "block.literal");
// Get pointer to the block invoke function
llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3);
@@ -1231,12 +1206,6 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
const CGFunctionInfo &FnInfo =
CGM.getTypes().arrangeBlockFunctionCall(Args, FuncTy);
- // Cast the function pointer to the right type.
- llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo);
-
- llvm::Type *BlockFTyPtr = llvm::PointerType::getUnqual(BlockFTy);
- Func = Builder.CreatePointerCast(Func, BlockFTyPtr);
-
// Prepare the callee.
CGCallee Callee(CGCalleeInfo(), Func);
@@ -1481,7 +1450,7 @@ llvm::Function *CodeGenFunction::GenerateBlockFunction(
ImplicitParamDecl SelfDecl(getContext(), const_cast<BlockDecl *>(blockDecl),
SourceLocation(), II, selfTy,
- ImplicitParamDecl::ObjCSelf);
+ ImplicitParamKind::ObjCSelf);
args.push_back(&SelfDecl);
// Now add the rest of the parameters.
@@ -1689,7 +1658,6 @@ struct CallBlockRelease final : EHScopeStack::Cleanup {
llvm::Value *BlockVarAddr;
if (LoadBlockVarAddr) {
BlockVarAddr = CGF.Builder.CreateLoad(Addr);
- BlockVarAddr = CGF.Builder.CreateBitCast(BlockVarAddr, CGF.VoidPtrTy);
} else {
BlockVarAddr = Addr.getPointer();
}
@@ -1740,7 +1708,7 @@ static std::string getBlockCaptureStr(const CGBlockInfo::Capture &Cap,
Str += "c";
SmallString<256> TyStr;
llvm::raw_svector_ostream Out(TyStr);
- CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out);
+ CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(CaptureTy, Out);
Str += llvm::to_string(TyStr.size()) + TyStr.c_str();
break;
}
@@ -1899,16 +1867,16 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
CaptureStrKind::CopyHelper, CGM);
if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName))
- return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy);
+ return Func;
ASTContext &C = getContext();
QualType ReturnTy = C.VoidTy;
FunctionArgList args;
- ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamKind::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamKind::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
@@ -1997,9 +1965,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
}
case BlockCaptureEntityKind::BlockObject: {
llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
- srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
- llvm::Value *dstAddr =
- Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
+ llvm::Value *dstAddr = dstField.getPointer();
llvm::Value *args[] = {
dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
};
@@ -2022,7 +1988,7 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
FinishFunction();
- return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ return Fn;
}
static BlockFieldFlags
@@ -2088,14 +2054,14 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
CaptureStrKind::DisposeHelper, CGM);
if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName))
- return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy);
+ return Func;
ASTContext &C = getContext();
QualType ReturnTy = C.VoidTy;
FunctionArgList args;
- ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamKind::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
@@ -2145,7 +2111,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
FinishFunction();
- return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ return Fn;
}
namespace {
@@ -2337,10 +2303,10 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
QualType ReturnTy = Context.VoidTy;
FunctionArgList args;
- ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamKind::Other);
args.push_back(&Dst);
- ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamKind::Other);
args.push_back(&Src);
const CGFunctionInfo &FI =
@@ -2384,7 +2350,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
CGF.FinishFunction();
- return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy);
+ return Fn;
}
/// Build the copy helper for a __block variable.
@@ -2405,7 +2371,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
FunctionArgList args;
ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
args.push_back(&Src);
const CGFunctionInfo &FI =
@@ -2440,7 +2406,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
CGF.FinishFunction();
- return llvm::ConstantExpr::getBitCast(Fn, CGF.Int8PtrTy);
+ return Fn;
}
/// Build the dispose helper for a __block variable.
@@ -2615,11 +2581,11 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
SmallVector<llvm::Type *, 8> types;
// void *__isa;
- types.push_back(Int8PtrTy);
+ types.push_back(VoidPtrTy);
size += getPointerSize();
// void *__forwarding;
- types.push_back(llvm::PointerType::getUnqual(byrefType));
+ types.push_back(VoidPtrTy);
size += getPointerSize();
// int32_t __flags;
@@ -2634,11 +2600,11 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
bool hasCopyAndDispose = getContext().BlockRequiresCopying(Ty, D);
if (hasCopyAndDispose) {
/// void *__copy_helper;
- types.push_back(Int8PtrTy);
+ types.push_back(VoidPtrTy);
size += getPointerSize();
/// void *__destroy_helper;
- types.push_back(Int8PtrTy);
+ types.push_back(VoidPtrTy);
size += getPointerSize();
}
@@ -2647,7 +2613,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) &&
HasByrefExtendedLayout) {
/// void *__byref_variable_layout;
- types.push_back(Int8PtrTy);
+ types.push_back(VoidPtrTy);
size += CharUnits::fromQuantity(PointerSizeInBytes);
}
@@ -2796,10 +2762,8 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags,
bool CanThrow) {
llvm::FunctionCallee F = CGM.getBlockObjectDispose();
- llvm::Value *args[] = {
- Builder.CreateBitCast(V, Int8PtrTy),
- llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
- };
+ llvm::Value *args[] = {V,
+ llvm::ConstantInt::get(Int32Ty, flags.getBitMask())};
if (CanThrow)
EmitRuntimeCallOrInvoke(F, args);
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 68535920088c..bf5ab171d720 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -126,25 +126,22 @@ public:
return CreateAlignedStore(getInt1(Value), Addr, CharUnits::One());
}
- // Temporarily use old signature; clang will be updated to an Address overload
- // in a subsequent patch.
llvm::AtomicCmpXchgInst *
- CreateAtomicCmpXchg(llvm::Value *Ptr, llvm::Value *Cmp, llvm::Value *New,
+ CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New,
llvm::AtomicOrdering SuccessOrdering,
llvm::AtomicOrdering FailureOrdering,
llvm::SyncScope::ID SSID = llvm::SyncScope::System) {
return CGBuilderBaseTy::CreateAtomicCmpXchg(
- Ptr, Cmp, New, llvm::MaybeAlign(), SuccessOrdering, FailureOrdering,
- SSID);
+ Addr.getPointer(), Cmp, New, Addr.getAlignment().getAsAlign(),
+ SuccessOrdering, FailureOrdering, SSID);
}
- // Temporarily use old signature; clang will be updated to an Address overload
- // in a subsequent patch.
llvm::AtomicRMWInst *
- CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, llvm::Value *Ptr,
- llvm::Value *Val, llvm::AtomicOrdering Ordering,
+ CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val,
+ llvm::AtomicOrdering Ordering,
llvm::SyncScope::ID SSID = llvm::SyncScope::System) {
- return CGBuilderBaseTy::CreateAtomicRMW(Op, Ptr, Val, llvm::MaybeAlign(),
+ return CGBuilderBaseTy::CreateAtomicRMW(Op, Addr.getPointer(), Val,
+ Addr.getAlignment().getAsAlign(),
Ordering, SSID);
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 30f5f4e7061c..83d0a72aac54 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -25,8 +25,10 @@
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/OSLog.h"
+#include "clang/AST/OperationKinds.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/APFloat.h"
@@ -43,7 +45,6 @@
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/IntrinsicsHexagon.h"
-#include "llvm/IR/IntrinsicsLoongArch.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -55,6 +56,7 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/MatrixBuilder.h"
#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/TargetParser/X86TargetParser.h"
@@ -145,13 +147,12 @@ llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
// PPC, after backend supports IEEE 128-bit style libcalls.
if (getTriple().isPPC64() &&
&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
- F128Builtins.find(BuiltinID) != F128Builtins.end())
+ F128Builtins.contains(BuiltinID))
Name = F128Builtins[BuiltinID];
else if (getTriple().isOSAIX() &&
&getTarget().getLongDoubleFormat() ==
&llvm::APFloat::IEEEdouble() &&
- AIXLongDouble64Builtins.find(BuiltinID) !=
- AIXLongDouble64Builtins.end())
+ AIXLongDouble64Builtins.contains(BuiltinID))
Name = AIXLongDouble64Builtins[BuiltinID];
else
Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
@@ -187,8 +188,7 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
return V;
}
-static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF,
- const CallExpr *E) {
+static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) {
ASTContext &Ctx = CGF.getContext();
Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
unsigned Bytes = Ptr.getElementType()->isPointerTy()
@@ -198,8 +198,10 @@ static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF,
if (Align % Bytes != 0) {
DiagnosticsEngine &Diags = CGF.CGM.getDiags();
Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
+ // Force address to be at least naturally-aligned.
+ return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
}
- return Ptr.getPointer();
+ return Ptr;
}
/// Utility to insert an atomic instruction based on Intrinsic::ID
@@ -214,23 +216,17 @@ static Value *MakeBinaryAtomicValue(
E->getArg(0)->getType()->getPointeeType()));
assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
- llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
- unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
- llvm::IntegerType *IntType =
- llvm::IntegerType::get(CGF.getLLVMContext(),
- CGF.getContext().getTypeSize(T));
- llvm::Type *IntPtrType =
- llvm::PointerType::get(CGF.getLLVMContext(), AddrSpace);
+ llvm::IntegerType *IntType = llvm::IntegerType::get(
+ CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
- llvm::Value *Args[2];
- Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
+ llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Type *ValueType = Val->getType();
+ Val = EmitToInt(CGF, Val, T, IntType);
- llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], Ordering);
+ llvm::Value *Result =
+ CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
return EmitFromInt(CGF, Result, T, ValueType);
}
@@ -238,12 +234,8 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
Value *Val = CGF.EmitScalarExpr(E->getArg(0));
Value *Address = CGF.EmitScalarExpr(E->getArg(1));
- // Convert the type of the pointer to a pointer to the stored type.
Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
- unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
- Value *BC = CGF.Builder.CreateBitCast(
- Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
- LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getArg(0)->getType());
LV.setNontemporal(true);
CGF.EmitStoreOfScalar(Val, LV, false);
return nullptr;
@@ -277,20 +269,18 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
E->getArg(0)->getType()->getPointeeType()));
assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
- llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
- llvm::Value *Args[2];
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- Args[0] = DestPtr;
+ llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Type *ValueType = Val->getType();
+ Val = EmitToInt(CGF, Val, T, IntType);
llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
- Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
+ Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
+ Result = CGF.Builder.CreateBinOp(Op, Result, Val);
if (Invert)
Result =
CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
@@ -316,20 +306,18 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
bool ReturnBool) {
QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
- llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
- Value *Args[3];
- Args[0] = DestPtr;
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
+ Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Type *ValueType = Cmp->getType();
+ Cmp = EmitToInt(CGF, Cmp, T, IntType);
+ Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
- Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
+ DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
llvm::AtomicOrdering::SequentiallyConsistent);
if (ReturnBool)
// Extract boolean success flag and zext it to int.
@@ -365,7 +353,8 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
E->getArg(2)->getType()));
- auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
+
auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
@@ -379,8 +368,7 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
// _Interlocked* operations in the future, we will have to remove the volatile
// marker.
auto *Result = CGF.Builder.CreateAtomicCmpXchg(
- Destination, Comparand, Exchange,
- SuccessOrdering, FailureOrdering);
+ DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
Result->setVolatile(true);
return CGF.Builder.CreateExtractValue(Result, 0);
}
@@ -393,29 +381,34 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
// __int64 _ExchangeHigh,
// __int64 _ExchangeLow,
// __int64 * _ComparandResult);
+//
+// Note that Destination is assumed to be at least 16-byte aligned, despite
+// being typed int64.
+
static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
const CallExpr *E,
AtomicOrdering SuccessOrdering) {
assert(E->getNumArgs() == 4);
- llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
- llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
+ Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
- assert(Destination->getType()->isPointerTy());
+ assert(DestPtr->getType()->isPointerTy());
assert(!ExchangeHigh->getType()->isPointerTy());
assert(!ExchangeLow->getType()->isPointerTy());
- assert(ComparandPtr->getType()->isPointerTy());
// For Release ordering, the failure ordering should be Monotonic.
auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
? AtomicOrdering::Monotonic
: SuccessOrdering;
- // Convert to i128 pointers and values.
+ // Convert to i128 pointers and values. Alignment is also overridden for
+ // destination pointer.
llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
- Address ComparandResult(ComparandPtr, Int128Ty,
- CGF.getContext().toCharUnitsFromBits(128));
+ Address DestAddr(DestPtr, Int128Ty,
+ CGF.getContext().toCharUnitsFromBits(128));
+ ComparandAddr = ComparandAddr.withElementType(Int128Ty);
// (((i128)hi) << 64) | ((i128)lo)
ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
@@ -425,9 +418,9 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
// Load the comparand for the instruction.
- llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
+ llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
- auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
SuccessOrdering, FailureOrdering);
// The atomic instruction is marked volatile for consistency with MSVC. This
@@ -438,7 +431,7 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
// Store the result as an outparameter.
CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
- ComparandResult);
+ ComparandAddr);
// Get the success boolean and zero extend it to i8.
Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
@@ -450,24 +443,21 @@ static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
assert(E->getArg(0)->getType()->isPointerType());
auto *IntTy = CGF.ConvertType(E->getType());
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
auto *Result = CGF.Builder.CreateAtomicRMW(
- AtomicRMWInst::Add,
- CGF.EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- Ordering);
+ AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
}
-static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
+static Value *EmitAtomicDecrementValue(
+ CodeGenFunction &CGF, const CallExpr *E,
AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
assert(E->getArg(0)->getType()->isPointerType());
auto *IntTy = CGF.ConvertType(E->getType());
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
auto *Result = CGF.Builder.CreateAtomicRMW(
- AtomicRMWInst::Sub,
- CGF.EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- Ordering);
+ AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
}
@@ -503,8 +493,8 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
unsigned ConstrainedIntrinsicID) {
llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
if (CGF.Builder.getIsFPConstrained()) {
- CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
} else {
@@ -800,11 +790,6 @@ EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
}
Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
- llvm::Type *DestType = Int8PtrTy;
- if (ArgValue->getType() != DestType)
- ArgValue =
- Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
-
Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
}
@@ -834,6 +819,165 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
}
+llvm::Value *
+CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
+ llvm::IntegerType *ResType) {
+ // The code generated here calculates the size of a struct with a flexible
+ // array member that uses the counted_by attribute. There are two instances
+ // we handle:
+ //
+ // struct s {
+ // unsigned long flags;
+ // int count;
+ // int array[] __attribute__((counted_by(count)));
+ // }
+ //
+ // 1) bdos of the flexible array itself:
+ //
+ // __builtin_dynamic_object_size(p->array, 1) ==
+ // p->count * sizeof(*p->array)
+ //
+ // 2) bdos of a pointer into the flexible array:
+ //
+ // __builtin_dynamic_object_size(&p->array[42], 1) ==
+ // (p->count - 42) * sizeof(*p->array)
+ //
+ // 2) bdos of the whole struct, including the flexible array:
+ //
+ // __builtin_dynamic_object_size(p, 1) ==
+ // max(sizeof(struct s),
+ // offsetof(struct s, array) + p->count * sizeof(*p->array))
+ //
+ ASTContext &Ctx = getContext();
+ const Expr *Base = E->IgnoreParenImpCasts();
+ const Expr *Idx = nullptr;
+
+ if (const auto *UO = dyn_cast<UnaryOperator>(Base);
+ UO && UO->getOpcode() == UO_AddrOf) {
+ Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
+ Base = ASE->getBase()->IgnoreParenImpCasts();
+ Idx = ASE->getIdx()->IgnoreParenImpCasts();
+
+ if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
+ int64_t Val = IL->getValue().getSExtValue();
+ if (Val < 0)
+ // __bdos returns 0 for negative indexes into an array in a struct.
+ return getDefaultBuiltinObjectSizeResult(Type, ResType);
+
+ if (Val == 0)
+ // The index is 0, so we don't need to take it into account.
+ Idx = nullptr;
+ }
+ } else {
+ // Potential pointer to another element in the struct.
+ Base = SubExpr;
+ }
+ }
+
+ // Get the flexible array member Decl.
+ const ValueDecl *FAMDecl = nullptr;
+ if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
+ // Check if \p Base is referencing the FAM itself.
+ if (const ValueDecl *MD = ME->getMemberDecl()) {
+ const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
+ getLangOpts().getStrictFlexArraysLevel();
+ if (!Decl::isFlexibleArrayMemberLike(
+ Ctx, MD, MD->getType(), StrictFlexArraysLevel,
+ /*IgnoreTemplateOrMacroSubstitution=*/true))
+ return nullptr;
+
+ FAMDecl = MD;
+ }
+ } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
+ // Check if we're pointing to the whole struct.
+ QualType Ty = DRE->getDecl()->getType();
+ if (Ty->isPointerType())
+ Ty = Ty->getPointeeType();
+
+ if (const auto *RD = Ty->getAsRecordDecl())
+ // Don't use the outer lexical record because the FAM might be in a
+ // different RecordDecl.
+ FAMDecl = FindFlexibleArrayMemberField(Ctx, RD);
+ }
+
+ if (!FAMDecl || !FAMDecl->hasAttr<CountedByAttr>())
+ // No flexible array member found or it doesn't have the "counted_by"
+ // attribute.
+ return nullptr;
+
+ const ValueDecl *CountedByFD = FindCountedByField(Base);
+ if (!CountedByFD)
+ // Can't find the field referenced by the "counted_by" attribute.
+ return nullptr;
+
+ // Build a load of the counted_by field.
+ bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
+ const Expr *CountedByExpr = BuildCountedByFieldExpr(Base, CountedByFD);
+ Value *CountedByInst = EmitAnyExprToTemp(CountedByExpr).getScalarVal();
+ llvm::Type *CountedByTy = CountedByInst->getType();
+
+ // Build a load of the index and subtract it from the count.
+ Value *IdxInst = nullptr;
+ if (Idx) {
+ bool IdxSigned = Idx->getType()->isSignedIntegerType();
+ IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
+ IdxInst = IdxSigned ? Builder.CreateSExtOrTrunc(IdxInst, CountedByTy)
+ : Builder.CreateZExtOrTrunc(IdxInst, CountedByTy);
+
+ // We go ahead with the calculation here. If the index turns out to be
+ // negative, we'll catch it at the end.
+ CountedByInst =
+ Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
+ }
+
+ // Calculate how large the flexible array member is in bytes.
+ const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
+ CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());
+ llvm::Constant *ElemSize =
+ llvm::ConstantInt::get(CountedByTy, Size.getQuantity(), IsSigned);
+ Value *FAMSize =
+ Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
+ FAMSize = IsSigned ? Builder.CreateSExtOrTrunc(FAMSize, ResType)
+ : Builder.CreateZExtOrTrunc(FAMSize, ResType);
+ Value *Res = FAMSize;
+
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
+ // The whole struct is specificed in the __bdos.
+ const RecordDecl *OuterRD =
+ CountedByFD->getDeclContext()->getOuterLexicalRecordContext();
+ const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
+
+ // Get the offset of the FAM.
+ CharUnits Offset = Ctx.toCharUnitsFromBits(Ctx.getFieldOffset(FAMDecl));
+ llvm::Constant *FAMOffset =
+ ConstantInt::get(ResType, Offset.getQuantity(), IsSigned);
+ Value *OffsetAndFAMSize =
+ Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
+
+ // Get the full size of the struct.
+ llvm::Constant *SizeofStruct =
+ ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
+
+ // max(sizeof(struct s),
+ // offsetof(struct s, array) + p->count * sizeof(*p->array))
+ Res = IsSigned
+ ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
+ OffsetAndFAMSize, SizeofStruct)
+ : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
+ OffsetAndFAMSize, SizeofStruct);
+ }
+
+ // A negative \p IdxInst or \p CountedByInst means that the index lands
+ // outside of the flexible array member. If that's the case, we want to
+ // return 0.
+ Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
+ if (IdxInst)
+ Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
+
+ return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
+}
+
/// Returns a Value corresponding to the size of the given expression.
/// This Value may be either of the following:
/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
@@ -866,6 +1010,13 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
}
}
+ if (IsDynamic) {
+ // Emit special code for a flexible array member with the "counted_by"
+ // attribute.
+ if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
+ return V;
+ }
+
// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
// evaluate E for side-effects. In either case, we shouldn't lower to
// @llvm.objectsize.
@@ -991,9 +1142,8 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(),
CGF.getContext().getTypeSize(E->getArg(1)->getType()));
- llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext());
llvm::FunctionType *FTy =
- llvm::FunctionType::get(CGF.Int8Ty, {PtrType, IntType}, false);
+ llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
llvm::InlineAsm *IA =
llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
@@ -1062,8 +1212,7 @@ static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
Mask = CGF.Builder.CreateNot(Mask);
RMWOp = llvm::AtomicRMWInst::And;
}
- OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
- Ordering);
+ OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
} else {
// Emit a plain load for the non-interlocked intrinsics.
OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
@@ -1132,7 +1281,7 @@ static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,
Constraints += MachineClobbers;
}
- llvm::Type *PtrType = llvm::PointerType::getUnqual(CGF.getLLVMContext());
+ llvm::Type *PtrType = CGF.UnqualPtrTy;
llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
llvm::InlineAsm *IA =
@@ -1782,6 +1931,45 @@ Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,
return ArgValue;
}
+static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
+ return CGF.Builder.CreateBinaryIntrinsic(
+ Intrinsic::abs, ArgValue,
+ ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
+}
+
+static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E,
+ bool SanitizeOverflow) {
+ Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
+
+ // Try to eliminate overflow check.
+ if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
+ if (!VCI->isMinSignedValue())
+ return EmitAbs(CGF, ArgValue, true);
+ }
+
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
+ Constant *Zero = Constant::getNullValue(ArgValue->getType());
+ Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
+ Intrinsic::ssub_with_overflow, Zero, ArgValue);
+ Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
+ Value *NotOverflow = CGF.Builder.CreateNot(
+ CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
+
+ // TODO: support -ftrapv-handler.
+ if (SanitizeOverflow) {
+ CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
+ SanitizerHandler::NegateOverflow,
+ {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
+ CGF.EmitCheckTypeDescriptor(E->getType())},
+ {ArgValue});
+ } else
+ CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
+
+ Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
+ return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
+}
+
/// Get the argument type for arguments to os_log_helper.
static CanQualType getOSLogArgType(ASTContext &C, int Size) {
QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
@@ -1812,7 +2000,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
FunctionArgList Args;
Args.push_back(ImplicitParamDecl::Create(
Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
ArgTys.emplace_back(Ctx.VoidPtrTy);
for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
@@ -1824,7 +2012,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
Args.push_back(ImplicitParamDecl::Create(
Ctx, nullptr, SourceLocation(),
&Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
ArgTys.emplace_back(ArgTy);
}
@@ -2251,6 +2439,19 @@ static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
return nullptr;
}
+static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
+ const FunctionDecl *FD) {
+ auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
+ auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
+ auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
+
+ SmallVector<Value *, 16> Args;
+ for (auto &&FormalTy : FnTy->params())
+ Args.push_back(llvm::PoisonValue::get(FormalTy));
+
+ return RValue::get(CGF->Builder.CreateCall(UBF, Args));
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -2283,6 +2484,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const unsigned BuiltinIDIfNoAsmLabel =
FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
+ std::optional<bool> ErrnoOverriden;
+ // ErrnoOverriden is true if math-errno is overriden via the
+ // '#pragma float_control(precise, on)'. This pragma disables fast-math,
+ // which implies math-errno.
+ if (E->hasStoredFPFeatures()) {
+ FPOptionsOverride OP = E->getFPFeatures();
+ if (OP.hasMathErrnoOverride())
+ ErrnoOverriden = OP.getMathErrnoOverride();
+ }
+ // True if 'atttibute__((optnone)) is used. This attibute overrides
+ // fast-math which implies math-errno.
+ bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
+
+ // True if we are compiling at -O2 and errno has been disabled
+ // using the '#pragma float_control(precise, off)', and
+ // attribute opt-none hasn't been seen.
+ bool ErrnoOverridenToFalseWithOpt =
+ ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
+ CGM.getCodeGenOpts().OptimizationLevel != 0;
+
// There are LLVM math intrinsics/instructions corresponding to math library
// functions except the LLVM op will never set errno while the math library
// might. Also, math builtins have the same semantics as their math library
@@ -2290,13 +2511,69 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// LLVM counterparts if the call is marked 'const' (known to never set errno).
// In case FP exceptions are enabled, the experimental versions of the
// intrinsics model those.
+ bool ConstAlways =
+ getContext().BuiltinInfo.isConst(BuiltinID);
+
+ // There's a special case with the fma builtins where they are always const
+ // if the target environment is GNU or the target is OS is Windows and we're
+ // targeting the MSVCRT.dll environment.
+ // FIXME: This list can be become outdated. Need to find a way to get it some
+ // other way.
+ switch (BuiltinID) {
+ case Builtin::BI__builtin_fma:
+ case Builtin::BI__builtin_fmaf:
+ case Builtin::BI__builtin_fmal:
+ case Builtin::BIfma:
+ case Builtin::BIfmaf:
+ case Builtin::BIfmal: {
+ auto &Trip = CGM.getTriple();
+ if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
+ ConstAlways = true;
+ break;
+ }
+ default:
+ break;
+ }
+
bool ConstWithoutErrnoAndExceptions =
getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);
bool ConstWithoutExceptions =
getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);
- if (FD->hasAttr<ConstAttr>() ||
- ((ConstWithoutErrnoAndExceptions || ConstWithoutExceptions) &&
- (!ConstWithoutErrnoAndExceptions || (!getLangOpts().MathErrno)))) {
+
+ // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
+ // disabled.
+ // Math intrinsics are generated only when math-errno is disabled. Any pragmas
+ // or attributes that affect math-errno should prevent or allow math
+ // intrincs to be generated. Intrinsics are generated:
+ // 1- In fast math mode, unless math-errno is overriden
+ // via '#pragma float_control(precise, on)', or via an
+ // 'attribute__((optnone))'.
+ // 2- If math-errno was enabled on command line but overriden
+ // to false via '#pragma float_control(precise, off))' and
+ // 'attribute__((optnone))' hasn't been used.
+ // 3- If we are compiling with optimization and errno has been disabled
+ // via '#pragma float_control(precise, off)', and
+ // 'attribute__((optnone))' hasn't been used.
+
+ bool ConstWithoutErrnoOrExceptions =
+ ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
+ bool GenerateIntrinsics =
+ (ConstAlways && !OptNone) ||
+ (!getLangOpts().MathErrno &&
+ !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
+ if (!GenerateIntrinsics) {
+ GenerateIntrinsics =
+ ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
+ if (!GenerateIntrinsics)
+ GenerateIntrinsics =
+ ConstWithoutErrnoOrExceptions &&
+ (!getLangOpts().MathErrno &&
+ !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
+ if (!GenerateIntrinsics)
+ GenerateIntrinsics =
+ ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
+ }
+ if (GenerateIntrinsics) {
switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIceil:
case Builtin::BIceilf:
@@ -2355,7 +2632,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::exp2,
Intrinsic::experimental_constrained_exp2));
-
+ case Builtin::BI__builtin_exp10:
+ case Builtin::BI__builtin_exp10f:
+ case Builtin::BI__builtin_exp10f16:
+ case Builtin::BI__builtin_exp10l:
+ case Builtin::BI__builtin_exp10f128: {
+ // TODO: strictfp support
+ if (Builder.getIsFPConstrained())
+ break;
+ return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
+ }
case Builtin::BIfabs:
case Builtin::BIfabsf:
case Builtin::BIfabsl:
@@ -2544,7 +2830,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sqrtf:
case Builtin::BI__builtin_sqrtf16:
case Builtin::BI__builtin_sqrtl:
- case Builtin::BI__builtin_sqrtf128: {
+ case Builtin::BI__builtin_sqrtf128:
+ case Builtin::BI__builtin_elementwise_sqrt: {
llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
*this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
SetSqrtFPAccuracy(Call);
@@ -2619,6 +2906,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
}
+ // Check NonnullAttribute/NullabilityArg and Alignment.
+ auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
+ unsigned ParmNum) {
+ Value *Val = A.getPointer();
+ EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
+ ParmNum);
+
+ if (SanOpts.has(SanitizerKind::Alignment)) {
+ SanitizerSet SkippedChecks;
+ SkippedChecks.set(SanitizerKind::All);
+ SkippedChecks.clear(SanitizerKind::Alignment);
+ SourceLocation Loc = Arg->getExprLoc();
+ // Strip an implicit cast.
+ if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
+ if (CE->getCastKind() == CK_BitCast)
+ Arg = CE->getSubExpr();
+ EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
+ SkippedChecks);
+ }
+ };
+
switch (BuiltinIDIfNoAsmLabel) {
default: break;
case Builtin::BI__builtin___CFStringMakeConstantString:
@@ -2636,24 +2944,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_va_copy: {
Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
-
- llvm::Type *Type = Int8PtrTy;
-
- DstPtr = Builder.CreateBitCast(DstPtr, Type);
- SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr});
return RValue::get(nullptr);
}
+ case Builtin::BIabs:
+ case Builtin::BIlabs:
+ case Builtin::BIllabs:
case Builtin::BI__builtin_abs:
case Builtin::BI__builtin_labs:
case Builtin::BI__builtin_llabs: {
- // X < 0 ? -X : X
- // The negation has 'nsw' because abs of INT_MIN is undefined.
- Value *ArgValue = EmitScalarExpr(E->getArg(0));
- Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
- Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
- Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
- Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
+ bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
+
+ Value *Result;
+ switch (getLangOpts().getSignedOverflowBehavior()) {
+ case LangOptions::SOB_Defined:
+ Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
+ break;
+ case LangOptions::SOB_Undefined:
+ if (!SanitizeOverflow) {
+ Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
+ break;
+ }
+ [[fallthrough]];
+ case LangOptions::SOB_Trapping:
+ // TODO: Somehow handle the corner case when the address of abs is taken.
+ Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
+ break;
+ }
return RValue::get(Result);
}
case Builtin::BI__builtin_complex: {
@@ -3146,6 +3463,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
ConvertType(E->getType())));
}
+ case Builtin::BI__builtin_issignaling: {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ Value *V = EmitScalarExpr(E->getArg(0));
+ return RValue::get(
+ Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
+ ConvertType(E->getType())));
+ }
+
case Builtin::BI__builtin_isinf: {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
Value *V = EmitScalarExpr(E->getArg(0));
@@ -3180,6 +3505,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
ConvertType(E->getType())));
}
+ case Builtin::BI__builtin_issubnormal: {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ Value *V = EmitScalarExpr(E->getArg(0));
+ return RValue::get(
+ Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
+ ConvertType(E->getType())));
+ }
+
+ case Builtin::BI__builtin_iszero: {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
+ Value *V = EmitScalarExpr(E->getArg(0));
+ return RValue::get(
+ Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
+ ConvertType(E->getType())));
+ }
+
case Builtin::BI__builtin_isfpclass: {
Expr::EvalResult Result;
if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
@@ -3237,6 +3578,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_pow: {
return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
}
+ case Builtin::BI__builtin_elementwise_bitreverse:
+ return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
+ "elt.bitreverse"));
case Builtin::BI__builtin_elementwise_cos:
return RValue::get(
emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
@@ -3514,6 +3858,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}
+ // An alloca will always return a pointer to the alloca (stack) address
+ // space. This address space need not be the same as the AST / Language
+ // default (e.g. in C / C++ auto vars are in the generic address space). At
+ // the AST level this is handled within CreateTempAlloca et al., but for the
+ // builtin / dynamic alloca we have to handle it here. We use an explicit cast
+ // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
case Builtin::BIalloca:
case Builtin::BI_alloca:
case Builtin::BI__builtin_alloca_uninitialized:
@@ -3529,6 +3879,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
AI->setAlignment(SuitableAlignmentInBytes);
if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
+ LangAS AAS = getASTAllocaAddressSpace();
+ LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
+ if (AAS != EAS) {
+ llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
+ return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
+ EAS, Ty));
+ }
return RValue::get(AI);
}
@@ -3544,6 +3901,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
AI->setAlignment(AlignmentInBytes);
if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
initializeAlloca(*this, AI, Size, AlignmentInBytes);
+ LangAS AAS = getASTAllocaAddressSpace();
+ LangAS EAS = E->getType()->getPointeeType().getAddressSpace();
+ if (AAS != EAS) {
+ llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
+ return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
+ EAS, Ty));
+ }
return RValue::get(AI);
}
@@ -3556,6 +3920,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
return RValue::get(nullptr);
}
+
+ case Builtin::BIbcopy:
+ case Builtin::BI__builtin_bcopy: {
+ Address Src = EmitPointerWithAlignment(E->getArg(0));
+ Address Dest = EmitPointerWithAlignment(E->getArg(1));
+ Value *SizeVal = EmitScalarExpr(E->getArg(2));
+ EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(),
+ E->getArg(0)->getExprLoc(), FD, 0);
+ EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(1)->getType(),
+ E->getArg(1)->getExprLoc(), FD, 0);
+ Builder.CreateMemMove(Dest, Src, SizeVal, false);
+ return RValue::get(Dest.getPointer());
+ }
+
case Builtin::BImemcpy:
case Builtin::BI__builtin_memcpy:
case Builtin::BImempcpy:
@@ -3563,10 +3941,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address Dest = EmitPointerWithAlignment(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *SizeVal = EmitScalarExpr(E->getArg(2));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
+ EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+ EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
Builder.CreateMemCpy(Dest, Src, SizeVal, false);
if (BuiltinID == Builtin::BImempcpy ||
BuiltinID == Builtin::BI__builtin_mempcpy)
@@ -3581,10 +3957,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address Src = EmitPointerWithAlignment(E->getArg(1));
uint64_t Size =
E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
+ EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+ EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
Builder.CreateMemCpyInline(Dest, Src, Size);
return RValue::get(nullptr);
}
@@ -3641,10 +4015,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address Dest = EmitPointerWithAlignment(E->getArg(0));
Address Src = EmitPointerWithAlignment(E->getArg(1));
Value *SizeVal = EmitScalarExpr(E->getArg(2));
- EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
- E->getArg(0)->getExprLoc(), FD, 0);
- EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
- E->getArg(1)->getExprLoc(), FD, 1);
+ EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
+ EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
Builder.CreateMemMove(Dest, Src, SizeVal, false);
return RValue::get(Dest.getPointer());
}
@@ -3906,8 +4278,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateStore(FrameAddr, Buf);
// Store the stack pointer to the setjmp buffer.
- Value *StackAddr =
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
+ Value *StackAddr = Builder.CreateStackSave();
+ assert(Buf.getPointer()->getType() == StackAddr->getType());
+
Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
Builder.CreateStore(StackAddr, StackSaveSlot);
@@ -3917,7 +4290,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BI__builtin_longjmp: {
Value *Buf = EmitScalarExpr(E->getArg(0));
- Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
// Call LLVM's EH longjmp, which is lightweight.
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
@@ -4080,14 +4452,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__sync_lock_release_4:
case Builtin::BI__sync_lock_release_8:
case Builtin::BI__sync_lock_release_16: {
- Value *Ptr = CheckAtomicAlignment(*this, E);
+ Address Ptr = CheckAtomicAlignment(*this, E);
QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
- llvm::Type *ITy =
- llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8);
+
+ llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
+ getContext().getTypeSize(ElTy));
llvm::StoreInst *Store =
- Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
- StoreSize);
+ Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
Store->setAtomic(llvm::AtomicOrdering::Release);
return RValue::get(nullptr);
}
@@ -4138,7 +4509,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
bool Volatile =
PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
- Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Address Ptr =
+ EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty);
+
Value *NewVal = Builder.getInt8(1);
Value *Order = EmitScalarExpr(E->getArg(1));
if (isa<llvm::ConstantInt>(Order)) {
@@ -4659,7 +5032,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::IntegerType *IntType = IntegerType::get(
getLLVMContext(), getContext().getTypeSize(E->getType()));
- llvm::Value *Destination = EmitScalarExpr(E->getArg(0));
+ Address DestAddr = CheckAtomicAlignment(*this, E);
llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
RTy = Exchange->getType();
@@ -4672,7 +5045,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
- auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
Ordering, Ordering);
Result->setVolatile(true);
@@ -4784,7 +5157,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__GetExceptionInfo: {
if (llvm::GlobalVariable *GV =
CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
- return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
+ return RValue::get(GV);
break;
}
@@ -4834,8 +5207,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// Type of the generic packet parameter.
unsigned GenericAS =
getContext().getTargetAddressSpace(LangAS::opencl_generic);
- llvm::Type *I8PTy = llvm::PointerType::get(
- llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
+ llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
// Testing which overloaded version we should generate the call for.
if (2U == E->getNumArgs()) {
@@ -4980,11 +5352,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BIto_local:
case Builtin::BIto_private: {
auto Arg0 = EmitScalarExpr(E->getArg(0));
- auto NewArgT = llvm::PointerType::get(Int8Ty,
- CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
- auto NewRetT = llvm::PointerType::get(Int8Ty,
- CGM.getContext().getTargetAddressSpace(
- E->getType()->getPointeeType().getAddressSpace()));
+ auto NewArgT = llvm::PointerType::get(
+ getLLVMContext(),
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
+ auto NewRetT = llvm::PointerType::get(
+ getLLVMContext(),
+ CGM.getContext().getTargetAddressSpace(
+ E->getType()->getPointeeType().getAddressSpace()));
auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
llvm::Value *NewArg;
if (Arg0->getType()->getPointerAddressSpace() !=
@@ -5006,7 +5380,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
unsigned NumArgs = E->getNumArgs();
llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
@@ -5050,7 +5424,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
-> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
llvm::APInt ArraySize(32, NumArgs - First);
QualType SizeArrayTy = getContext().getConstantArrayType(
- getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal,
+ getContext().getSizeType(), ArraySize, nullptr,
+ ArraySizeModifier::Normal,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
@@ -5184,7 +5559,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
// OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
// parameter.
case Builtin::BIget_kernel_work_group_size: {
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
@@ -5199,7 +5574,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
{Kernel, Arg}));
}
case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
@@ -5215,7 +5590,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
- llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+ llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
llvm::Value *NDRange = NDRangeL.getAddress(*this).getPointer();
@@ -5367,12 +5742,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Address DestAddr = EmitMSVAListRef(E->getArg(0));
Address SrcAddr = EmitMSVAListRef(E->getArg(1));
- llvm::Type *BPP = Int8PtrPtrTy;
-
- DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
- Int8PtrTy, DestAddr.getAlignment());
- SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
- Int8PtrTy, SrcAddr.getAlignment());
+ DestAddr = DestAddr.withElementType(Int8PtrTy);
+ SrcAddr = SrcAddr.withElementType(Int8PtrTy);
Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
@@ -5441,18 +5812,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::FunctionType *FTy = F->getFunctionType();
for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
- Value *ArgValue;
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- ArgValue = EmitScalarExpr(E->getArg(i));
- } else {
- // If this is required to be a constant, constant fold it so that we
- // know that the generated intrinsic gets a ConstantInt.
- ArgValue = llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext()));
- }
-
+ Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
// If the intrinsic arg type is different from the builtin arg type
// we need to do a bit cast.
llvm::Type *PTy = FTy->getParamType(i);
@@ -5541,6 +5901,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
}
+ if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
+ return EmitHipStdParUnsupportedBuiltin(this, FD);
+
ErrorUnsupported(E, "builtin function");
// Unknown builtin, for now just dump it out and return undef.
@@ -5551,6 +5914,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch) {
+ // When compiling in HipStdPar mode we have to be conservative in rejecting
+ // target specific features in the FE, and defer the possible error to the
+ // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
+ // referenced by an accelerator executable function, we emit an error.
+ // Returning nullptr here leads to the builtin being handled in
+ // EmitStdParUnsupportedBuiltin.
+ if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
+ Arch != CGF->getTarget().getTriple().getArch())
+ return nullptr;
+
switch (Arch) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
@@ -5588,9 +5961,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
- case llvm::Triple::loongarch32:
- case llvm::Triple::loongarch64:
- return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E);
default:
return nullptr;
}
@@ -6313,13 +6683,21 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
NEONMAP0(vrndi_v),
NEONMAP0(vrndiq_v),
NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
@@ -7231,13 +7609,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vld1q_x3_v:
case NEON::BI__builtin_neon_vld1_x4_v:
case NEON::BI__builtin_neon_vld1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld2_v:
@@ -7256,8 +7630,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
Value *Align = getAlignmentValue32(PtrOp1);
Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld1_dup_v:
@@ -7281,8 +7653,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
Ops.push_back(getAlignmentValue32(PtrOp1));
Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vmovl_v: {
@@ -7461,16 +7831,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
case NEON::BI__builtin_neon_vst1q_x3_v:
case NEON::BI__builtin_neon_vst1_x4_v:
case NEON::BI__builtin_neon_vst1q_x4_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType());
// TODO: Currently in AArch32 mode the pointer operand comes first, whereas
// in AArch64 it comes last. We may want to stick to one or another.
if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
Arch == llvm::Triple::aarch64_32) {
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
}
- llvm::Type *Tys[2] = { PTy, VTy };
+ llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
}
case NEON::BI__builtin_neon_vsubhn_v: {
@@ -7492,7 +7861,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
case NEON::BI__builtin_neon_vtrn_v:
case NEON::BI__builtin_neon_vtrnq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Value *SV = nullptr;
@@ -7520,7 +7888,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
case NEON::BI__builtin_neon_vuzp_v:
case NEON::BI__builtin_neon_vuzpq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Value *SV = nullptr;
@@ -7543,7 +7910,6 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
}
case NEON::BI__builtin_neon_vzip_v:
case NEON::BI__builtin_neon_vzipq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Value *SV = nullptr;
@@ -7747,6 +8113,26 @@ enum SpecialRegisterAccessKind {
Write,
};
+// Generates the IR for __builtin_read_exec_*.
+// Lowers the builtin to amdgcn_ballot intrinsic.
+static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
+ llvm::Type *RegisterType,
+ llvm::Type *ValueType, bool isExecHi) {
+ CodeGen::CGBuilderTy &Builder = CGF.Builder;
+ CodeGen::CodeGenModule &CGM = CGF.CGM;
+
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
+ llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
+
+ if (isExecHi) {
+ Value *Rt2 = Builder.CreateLShr(Call, 32);
+ Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
+ return Rt2;
+ }
+
+ return Call;
+}
+
// Generates the IR for the read/write special register builtin,
// ValueType is the type of the value that is to be written or read,
// RegisterType is the type of the register being written to or read from.
@@ -8031,8 +8417,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
Value *LdPtr = EmitScalarExpr(E->getArg(0));
- Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
- "ldrexd");
+ Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
Value *Val0 = Builder.CreateExtractValue(Val, 1);
Value *Val1 = Builder.CreateExtractValue(Val, 0);
@@ -8053,12 +8438,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
llvm::Type *RealResTy = ConvertType(Ty);
llvm::Type *IntTy =
llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
- llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext());
Function *F = CGM.getIntrinsic(
BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
: Intrinsic::arm_ldrex,
- PtrTy);
+ UnqualPtrTy);
CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
Val->addParamAttr(
0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
@@ -8091,7 +8475,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
Value *Arg0 = Builder.CreateExtractValue(Val, 0);
Value *Arg1 = Builder.CreateExtractValue(Val, 1);
- Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
+ Value *StPtr = EmitScalarExpr(E->getArg(1));
return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
}
@@ -8307,15 +8691,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
}
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
switch (BuiltinID) {
@@ -9081,6 +9457,11 @@ static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
// the elements of the specified datatype.
Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
llvm::ScalableVectorType *VTy) {
+
+ if (isa<TargetExtType>(Pred->getType()) &&
+ cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
+ return Pred;
+
auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
if (Pred->getType() == RTy)
return Pred;
@@ -9116,13 +9497,6 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
auto *OverloadedTy =
llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
- // At the ACLE level there's only one predicate type, svbool_t, which is
- // mapped to <n x 16 x i1>. However, this might be incompatible with the
- // actual type being loaded. For example, when loading doubles (i64) the
- // predicated should be <n x 2 x i1> instead. At the IR level the type of
- // the predicate and the data being loaded must match. Cast accordingly.
- Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
-
Function *F = nullptr;
if (Ops[1]->getType()->isVectorTy())
// This is the "vector base, scalar offset" case. In order to uniquely
@@ -9136,6 +9510,16 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,
// intrinsic.
F = CGM.getIntrinsic(IntID, OverloadedTy);
+ // At the ACLE level there's only one predicate type, svbool_t, which is
+ // mapped to <n x 16 x i1>. However, this might be incompatible with the
+ // actual type being loaded. For example, when loading doubles (i64) the
+ // predicate should be <n x 2 x i1> instead. At the IR level the type of
+ // the predicate and the data being loaded must match. Cast to the type
+ // expected by the intrinsic. The intrinsic itself should be defined in
+ // a way than enforces relations between parameter types.
+ Ops[0] = EmitSVEPredicateCast(
+ Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
+
// Pass 0 when the offset is missing. This can only be applied when using
// the "vector base" addressing mode for which ACLE allows no offset. The
// corresponding LLVM IR always requires an offset.
@@ -9200,8 +9584,11 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,
// mapped to <n x 16 x i1>. However, this might be incompatible with the
// actual type being stored. For example, when storing doubles (i64) the
// predicated should be <n x 2 x i1> instead. At the IR level the type of
- // the predicate and the data being stored must match. Cast accordingly.
- Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy);
+ // the predicate and the data being stored must match. Cast to the type
+ // expected by the intrinsic. The intrinsic itself should be defined in
+ // a way that enforces relations between parameter types.
+ Ops[1] = EmitSVEPredicateCast(
+ Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
// For "vector base, scalar index" scale the index so that it becomes a
// scalar offset.
@@ -9251,18 +9638,23 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value*> &Ops,
unsigned IntID) {
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
- auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
- auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
unsigned N;
switch (IntID) {
case Intrinsic::aarch64_sve_ld2_sret:
+ case Intrinsic::aarch64_sve_ld1_pn_x2:
+ case Intrinsic::aarch64_sve_ldnt1_pn_x2:
+ case Intrinsic::aarch64_sve_ld2q_sret:
N = 2;
break;
case Intrinsic::aarch64_sve_ld3_sret:
+ case Intrinsic::aarch64_sve_ld3q_sret:
N = 3;
break;
case Intrinsic::aarch64_sve_ld4_sret:
+ case Intrinsic::aarch64_sve_ld1_pn_x4:
+ case Intrinsic::aarch64_sve_ldnt1_pn_x4:
+ case Intrinsic::aarch64_sve_ld4q_sret:
N = 4;
break;
default:
@@ -9271,14 +9663,13 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
auto RetTy = llvm::VectorType::get(VTy->getElementType(),
VTy->getElementCount() * N);
- Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
- Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
+ Value *BasePtr = Ops[1];
// Does the load have an offset?
if (Ops.size() > 2)
BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
- BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
Function *F = CGM.getIntrinsic(IntID, {VTy});
Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
unsigned MinElts = VTy->getMinNumElements();
@@ -9295,18 +9686,23 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value*> &Ops,
unsigned IntID) {
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
- auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
- auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
unsigned N;
switch (IntID) {
case Intrinsic::aarch64_sve_st2:
+ case Intrinsic::aarch64_sve_st1_pn_x2:
+ case Intrinsic::aarch64_sve_stnt1_pn_x2:
+ case Intrinsic::aarch64_sve_st2q:
N = 2;
break;
case Intrinsic::aarch64_sve_st3:
+ case Intrinsic::aarch64_sve_st3q:
N = 3;
break;
case Intrinsic::aarch64_sve_st4:
+ case Intrinsic::aarch64_sve_st1_pn_x4:
+ case Intrinsic::aarch64_sve_stnt1_pn_x4:
+ case Intrinsic::aarch64_sve_st4q:
N = 4;
break;
default:
@@ -9314,26 +9710,20 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
}
Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
- Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
+ Value *BasePtr = Ops[1];
// Does the store have an offset?
- if (Ops.size() > 3)
+ if (Ops.size() > (2 + N))
BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
- BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
- Value *Val = Ops.back();
-
// The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
// need to break up the tuple vector.
SmallVector<llvm::Value*, 5> Operands;
- unsigned MinElts = VTy->getElementCount().getKnownMinValue();
- for (unsigned I = 0; I < N; ++I) {
- Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
- Operands.push_back(Builder.CreateExtractVector(VTy, Val, Idx));
- }
+ for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
+ Operands.push_back(Ops[I]);
Operands.append({Predicate, BasePtr});
-
Function *F = CGM.getIntrinsic(IntID, { VTy });
+
return Builder.CreateCall(F, Operands);
}
@@ -9388,7 +9778,7 @@ Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
llvm::Type *ReturnTy,
SmallVectorImpl<Value *> &Ops,
- unsigned BuiltinID,
+ unsigned IntrinsicID,
bool IsZExtReturn) {
QualType LangPTy = E->getArg(1)->getType();
llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
@@ -9397,28 +9787,46 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
// The vector type that is returned may be different from the
// eventual type loaded from memory.
auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
- auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
+ llvm::ScalableVectorType *MemoryTy = nullptr;
+ llvm::ScalableVectorType *PredTy = nullptr;
+ bool IsQuadLoad = false;
+ switch (IntrinsicID) {
+ case Intrinsic::aarch64_sve_ld1uwq:
+ case Intrinsic::aarch64_sve_ld1udq:
+ MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
+ PredTy = llvm::ScalableVectorType::get(
+ llvm::Type::getInt1Ty(getLLVMContext()), 1);
+ IsQuadLoad = true;
+ break;
+ default:
+ MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
+ PredTy = MemoryTy;
+ break;
+ }
- Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
Value *BasePtr = Ops[1];
// Does the load have an offset?
if (Ops.size() > 2)
BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
- Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
+ Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
auto *Load =
cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
+ if (IsQuadLoad)
+ return Load;
+
return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
- : Builder.CreateSExt(Load, VectorTy);
+ : Builder.CreateSExt(Load, VectorTy);
}
Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
SmallVectorImpl<Value *> &Ops,
- unsigned BuiltinID) {
+ unsigned IntrinsicID) {
QualType LangPTy = E->getArg(1)->getType();
llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
LangPTy->castAs<PointerType>()->getPointeeType());
@@ -9428,17 +9836,34 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
- Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
+ auto PredTy = MemoryTy;
+ auto AddrMemoryTy = MemoryTy;
+ bool IsQuadStore = false;
+
+ switch (IntrinsicID) {
+ case Intrinsic::aarch64_sve_st1uwq:
+ case Intrinsic::aarch64_sve_st1udq:
+ AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
+ PredTy =
+ llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
+ IsQuadStore = true;
+ break;
+ default:
+ break;
+ }
+ Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
Value *BasePtr = Ops[1];
// Does the store have an offset?
if (Ops.size() == 4)
- BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
+ BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
// Last value is always the data
- llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy);
+ Value *Val =
+ IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
- Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy);
+ Function *F =
+ CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
auto *Store =
cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
@@ -9446,59 +9871,49 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
return Store;
}
-Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
- llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false);
- return Builder.CreateAdd(Base, CastOffset, "tileslice");
-}
-
-Value *CodeGenFunction::EmitSMELd1St1(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
- Ops[3] = EmitSVEPredicateCast(
- Ops[3], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
+ Ops[2] = EmitSVEPredicateCast(
+ Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));
SmallVector<Value *> NewOps;
- NewOps.push_back(Ops[3]);
+ NewOps.push_back(Ops[2]);
- llvm::Value *BasePtr = Ops[4];
+ llvm::Value *BasePtr = Ops[3];
// If the intrinsic contains the vnum parameter, multiply it with the vector
// size in bytes.
- if (Ops.size() == 6) {
+ if (Ops.size() == 5) {
Function *StreamingVectorLength =
CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
llvm::Value *StreamingVectorLengthCall =
Builder.CreateCall(StreamingVectorLength);
llvm::Value *Mulvl =
- Builder.CreateMul(StreamingVectorLengthCall, Ops[5], "mulvl");
+ Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
// The type of the ptr parameter is void *, so use Int8Ty here.
- BasePtr = Builder.CreateGEP(Int8Ty, Ops[4], Mulvl);
+ BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
}
NewOps.push_back(BasePtr);
NewOps.push_back(Ops[0]);
- NewOps.push_back(EmitTileslice(Ops[2], Ops[1]));
+ NewOps.push_back(Ops[1]);
Function *F = CGM.getIntrinsic(IntID);
return Builder.CreateCall(F, NewOps);
}
-Value *CodeGenFunction::EmitSMEReadWrite(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
auto *VecTy = getSVEType(TypeFlags);
Function *F = CGM.getIntrinsic(IntID, VecTy);
- if (TypeFlags.isReadZA()) {
+ if (TypeFlags.isReadZA())
Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
- Ops[3] = EmitTileslice(Ops[4], Ops[3]);
- Ops.erase(&Ops[4]);
- } else if (TypeFlags.isWriteZA()) {
- Ops[1] = EmitTileslice(Ops[2], Ops[1]);
- Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy);
- Ops.erase(&Ops[3]);
- }
+ else if (TypeFlags.isWriteZA())
+ Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
return Builder.CreateCall(F, Ops);
}
-Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
// svzero_za() intrinsic zeros the entire za tile and has no paramters.
@@ -9508,18 +9923,13 @@ Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags,
return Builder.CreateCall(F, Ops);
}
-Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags,
+Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
- Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
- llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
- llvm::Value *MulVL = Builder.CreateMul(
- CntsbCall,
- Builder.getInt64(cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
- "mulvl");
- Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL);
- Ops[0] = EmitTileslice(Ops[1], Ops[0]);
- Ops.erase(&Ops[1]);
+ if (Ops.size() == 2)
+ Ops.push_back(Builder.getInt32(0));
+ else
+ Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
Function *F = CGM.getIntrinsic(IntID, {});
return Builder.CreateCall(F, Ops);
}
@@ -9612,26 +10022,59 @@ Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,
return Call;
}
-Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
+Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {
+ // Multi-vector results should be broken up into a single (wide) result
+ // vector.
+ auto *StructTy = dyn_cast<StructType>(Call->getType());
+ if (!StructTy)
+ return Call;
+
+ auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
+ if (!VTy)
+ return Call;
+ unsigned N = StructTy->getNumElements();
+
+ // We may need to emit a cast to a svbool_t
+ bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
+ unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
+
+ ScalableVectorType *WideVTy =
+ ScalableVectorType::get(VTy->getElementType(), MinElts * N);
+ Value *Ret = llvm::PoisonValue::get(WideVTy);
+ for (unsigned I = 0; I < N; ++I) {
+ Value *SRet = Builder.CreateExtractValue(Call, I);
+ assert(SRet->getType() == VTy && "Unexpected type for result value");
+ Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
+
+ if (IsPredTy)
+ SRet = EmitSVEPredicateCast(
+ SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
+
+ Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
+ }
+ Call = Ret;
+
+ return Call;
+}
+
+void CodeGenFunction::GetAArch64SVEProcessedOperands(
+ unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
+ SVETypeFlags TypeFlags) {
// Find out if any arguments are required to be integer constant expressions.
unsigned ICEArguments = 0;
ASTContext::GetBuiltinTypeError Error;
getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
assert(Error == ASTContext::GE_None && "Should not codegen an error");
- llvm::Type *Ty = ConvertType(E->getType());
- if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
- BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) {
- Value *Val = EmitScalarExpr(E->getArg(0));
- return EmitSVEReinterpret(Val, Ty);
- }
+ // Tuple set/get only requires one insert/extract vector, which is
+ // created by EmitSVETupleSetOrGet.
+ bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
- llvm::SmallVector<Value *, 4> Ops;
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- if ((ICEArguments & (1 << i)) == 0)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- else {
+ bool IsICE = ICEArguments & (1 << i);
+ Value *Arg = EmitScalarExpr(E->getArg(i));
+
+ if (IsICE) {
// If this is required to be a constant, constant fold it so that we know
// that the generated intrinsic gets a ConstantInt.
std::optional<llvm::APSInt> Result =
@@ -9643,12 +10086,49 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
// immediate requires more than a handful of bits.
*Result = Result->extOrTrunc(32);
Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
+ continue;
+ }
+
+ if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
+ Ops.push_back(Arg);
+ continue;
}
+
+ auto *VTy = cast<ScalableVectorType>(Arg->getType());
+ unsigned MinElts = VTy->getMinNumElements();
+ bool IsPred = VTy->getElementType()->isIntegerTy(1);
+ unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
+
+ if (N == 1) {
+ Ops.push_back(Arg);
+ continue;
+ }
+
+ for (unsigned I = 0; I < N; ++I) {
+ Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
+ auto *NewVTy =
+ ScalableVectorType::get(VTy->getElementType(), MinElts / N);
+ Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
+ }
+ }
+}
+
+Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
+ llvm::Type *Ty = ConvertType(E->getType());
+ if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
+ BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
+ Value *Val = EmitScalarExpr(E->getArg(0));
+ return EmitSVEReinterpret(Val, Ty);
}
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
AArch64SVEIntrinsicsProvenSorted);
+
+ llvm::SmallVector<Value *, 4> Ops;
SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
+
if (TypeFlags.isLoad())
return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
TypeFlags.isZExtReturn());
@@ -9662,14 +10142,14 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isGatherPrefetch())
return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isStructLoad())
- return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (TypeFlags.isStructStore())
- return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isStructLoad())
+ return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+ else if (TypeFlags.isStructStore())
+ return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
- return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
+ return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
else if (TypeFlags.isTupleCreate())
- return EmitSVETupleCreate(TypeFlags, Ty, Ops);
+ return EmitSVETupleCreate(TypeFlags, Ty, Ops);
else if (TypeFlags.isUndef())
return UndefValue::get(Ty);
else if (Builtin->LLVMIntrinsic != 0) {
@@ -9725,13 +10205,55 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
if (PredTy->getScalarType()->isIntegerTy(1))
Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
- return Call;
+ return FormSVEBuiltinResult(Call);
}
switch (BuiltinID) {
default:
return nullptr;
+ case SVE::BI__builtin_sve_svreinterpret_b: {
+ auto SVCountTy =
+ llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+ Function *CastFromSVCountF =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
+ return Builder.CreateCall(CastFromSVCountF, Ops[0]);
+ }
+ case SVE::BI__builtin_sve_svreinterpret_c: {
+ auto SVCountTy =
+ llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+ Function *CastToSVCountF =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
+ return Builder.CreateCall(CastToSVCountF, Ops[0]);
+ }
+
+ case SVE::BI__builtin_sve_svpsel_lane_b8:
+ case SVE::BI__builtin_sve_svpsel_lane_b16:
+ case SVE::BI__builtin_sve_svpsel_lane_b32:
+ case SVE::BI__builtin_sve_svpsel_lane_b64:
+ case SVE::BI__builtin_sve_svpsel_lane_c8:
+ case SVE::BI__builtin_sve_svpsel_lane_c16:
+ case SVE::BI__builtin_sve_svpsel_lane_c32:
+ case SVE::BI__builtin_sve_svpsel_lane_c64: {
+ bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
+ assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
+ "aarch64.svcount")) &&
+ "Unexpected TargetExtType");
+ auto SVCountTy =
+ llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
+ Function *CastFromSVCountF =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
+ Function *CastToSVCountF =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
+
+ auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
+ Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
+ llvm::Value *Ops0 =
+ IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
+ llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
+ llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
+ return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
+ }
case SVE::BI__builtin_sve_svmov_b_z: {
// svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
SVETypeFlags TypeFlags(Builtin->TypeModifier);
@@ -9853,6 +10375,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svpfalse_b:
return ConstantInt::getFalse(Ty);
+ case SVE::BI__builtin_sve_svpfalse_c: {
+ auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
+ Function *CastToSVCountF =
+ CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
+ return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
+ }
+
case SVE::BI__builtin_sve_svlen_bf16:
case SVE::BI__builtin_sve_svlen_f16:
case SVE::BI__builtin_sve_svlen_f32:
@@ -9888,13 +10417,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
case SVE::BI__builtin_sve_svtbl2_f64: {
SVETypeFlags TF(Builtin->TypeModifier);
auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
- Value *V0 = Builder.CreateExtractVector(VTy, Ops[0],
- ConstantInt::get(CGM.Int64Ty, 0));
- unsigned MinElts = VTy->getMinNumElements();
- Value *V1 = Builder.CreateExtractVector(
- VTy, Ops[0], ConstantInt::get(CGM.Int64Ty, MinElts));
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
- return Builder.CreateCall(F, {V0, V1, Ops[1]});
+ return Builder.CreateCall(F, Ops);
}
case SVE::BI__builtin_sve_svset_neonq_s8:
@@ -9952,35 +10476,13 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
- // Find out if any arguments are required to be integer constant expressions.
- unsigned ICEArguments = 0;
- ASTContext::GetBuiltinTypeError Error;
- getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
- assert(Error == ASTContext::GE_None && "Should not codegen an error");
-
- llvm::Type *Ty = ConvertType(E->getType());
- llvm::SmallVector<Value *, 4> Ops;
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- if ((ICEArguments & (1 << i)) == 0)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- std::optional<llvm::APSInt> Result =
- E->getArg(i)->getIntegerConstantExpr(getContext());
- assert(Result && "Expected argument to be a constant");
-
- // Immediates for SVE llvm intrinsics are always 32bit. We can safely
- // truncate because the immediate has been range checked and no valid
- // immediate requires more than a handful of bits.
- *Result = Result->extOrTrunc(32);
- Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
- }
- }
-
auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
AArch64SMEIntrinsicsProvenSorted);
+
+ llvm::SmallVector<Value *, 4> Ops;
SVETypeFlags TypeFlags(Builtin->TypeModifier);
+ GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
+
if (TypeFlags.isLoad() || TypeFlags.isStore())
return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
@@ -9989,23 +10491,28 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
BuiltinID == SME::BI__builtin_sme_svzero_za)
return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
- BuiltinID == SME::BI__builtin_sme_svstr_vnum_za)
+ BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
+ BuiltinID == SME::BI__builtin_sme_svldr_za ||
+ BuiltinID == SME::BI__builtin_sme_svstr_za)
return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
- else if (Builtin->LLVMIntrinsic != 0) {
- // Predicates must match the main datatype.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
- if (PredTy->getElementType()->isIntegerTy(1))
- Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
- Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
- getSVEOverloadTypes(TypeFlags, Ty, Ops));
- Value *Call = Builder.CreateCall(F, Ops);
- return Call;
- }
+ // Should not happen!
+ if (Builtin->LLVMIntrinsic == 0)
+ return nullptr;
- /// Should not happen
- return nullptr;
+ // Predicates must match the main datatype.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
+ if (PredTy->getElementType()->isIntegerTy(1))
+ Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
+
+ Function *F =
+ TypeFlags.isOverloadNone()
+ ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
+ : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
+ Value *Call = Builder.CreateCall(F, Ops);
+
+ return FormSVEBuiltinResult(Call);
}
Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
@@ -10210,8 +10717,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
: Intrinsic::aarch64_ldxp);
Value *LdPtr = EmitScalarExpr(E->getArg(0));
- Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
- "ldxp");
+ Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
Value *Val0 = Builder.CreateExtractValue(Val, 1);
Value *Val1 = Builder.CreateExtractValue(Val, 0);
@@ -10231,13 +10737,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *RealResTy = ConvertType(Ty);
llvm::Type *IntTy =
llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
- llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext());
Function *F =
CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
? Intrinsic::aarch64_ldaxr
: Intrinsic::aarch64_ldxr,
- PtrTy);
+ UnqualPtrTy);
CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
Val->addParamAttr(
0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
@@ -10268,8 +10773,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *Arg0 = Builder.CreateExtractValue(Val, 0);
Value *Arg1 = Builder.CreateExtractValue(Val, 1);
- Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
- Int8PtrTy);
+ Value *StPtr = EmitScalarExpr(E->getArg(1));
return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
}
@@ -10578,8 +11082,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
BuiltinID == AArch64::BI__writex18word ||
BuiltinID == AArch64::BI__writex18dword ||
BuiltinID == AArch64::BI__writex18qword) {
- llvm::Type *IntTy = ConvertType(E->getArg(1)->getType());
-
// Read x18 as i8*
LLVMContext &Context = CGM.getLLVMContext();
llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
@@ -10588,12 +11090,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Function *F =
CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
llvm::Value *X18 = Builder.CreateCall(F, Metadata);
- X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
+ X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
// Store val at x18 + offset
Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
- Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
Value *Val = EmitScalarExpr(E->getArg(1));
StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
return Store;
@@ -10613,16 +11114,79 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Function *F =
CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
llvm::Value *X18 = Builder.CreateCall(F, Metadata);
- X18 = Builder.CreateIntToPtr(X18, llvm::PointerType::get(Int8Ty, 0));
+ X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
// Load x18 + offset
Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
- Ptr = Builder.CreatePointerCast(Ptr, llvm::PointerType::get(IntTy, 0));
LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
return Load;
}
+ if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
+ BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
+ BuiltinID == AArch64::BI_CopyInt32FromFloat ||
+ BuiltinID == AArch64::BI_CopyInt64FromDouble) {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ llvm::Type *RetTy = ConvertType(E->getType());
+ return Builder.CreateBitCast(Arg, RetTy);
+ }
+
+ if (BuiltinID == AArch64::BI_CountLeadingOnes ||
+ BuiltinID == AArch64::BI_CountLeadingOnes64 ||
+ BuiltinID == AArch64::BI_CountLeadingZeros ||
+ BuiltinID == AArch64::BI_CountLeadingZeros64) {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ArgType = Arg->getType();
+
+ if (BuiltinID == AArch64::BI_CountLeadingOnes ||
+ BuiltinID == AArch64::BI_CountLeadingOnes64)
+ Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
+
+ Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+ Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
+
+ if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
+ BuiltinID == AArch64::BI_CountLeadingZeros64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+ }
+
+ if (BuiltinID == AArch64::BI_CountLeadingSigns ||
+ BuiltinID == AArch64::BI_CountLeadingSigns64) {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+
+ Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
+ ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
+ : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
+
+ Value *Result = Builder.CreateCall(F, Arg, "cls");
+ if (BuiltinID == AArch64::BI_CountLeadingSigns64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+ }
+
+ if (BuiltinID == AArch64::BI_CountOneBits ||
+ BuiltinID == AArch64::BI_CountOneBits64) {
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ArgType = ArgValue->getType();
+ Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
+
+ Value *Result = Builder.CreateCall(F, ArgValue);
+ if (BuiltinID == AArch64::BI_CountOneBits64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+ }
+
+ if (BuiltinID == AArch64::BI__prefetch) {
+ Value *Address = EmitScalarExpr(E->getArg(0));
+ Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
+ Value *Locality = ConstantInt::get(Int32Ty, 3);
+ Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
+ Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
+ return Builder.CreateCall(F, {Address, RW, Locality, Data});
+ }
+
// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
if (std::optional<MSVCIntrin> MsvcIntId =
@@ -10669,15 +11233,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
continue;
}
}
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- } else {
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(),
- *E->getArg(i)->getIntegerConstantExpr(getContext())));
- }
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
@@ -10718,14 +11274,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vldrq_p128: {
llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
- llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
- Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
return Builder.CreateAlignedLoad(Int128Ty, Ptr,
CharUnits::fromQuantity(16));
}
case NEON::BI__builtin_neon_vstrq_p128: {
- llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
- Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
+ Value *Ptr = Ops[0];
return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
}
case NEON::BI__builtin_neon_vcvts_f32_u32:
@@ -11360,12 +11914,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case clang::AArch64::BI_InterlockedAdd: {
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- Value *Arg1 = EmitScalarExpr(E->getArg(1));
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Add, Arg0, Arg1,
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateAdd(RMWI, Arg1);
+ Address DestAddr = CheckAtomicAlignment(*this, E);
+ Value *Val = EmitScalarExpr(E->getArg(1));
+ AtomicRMWInst *RMWI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ return Builder.CreateAdd(RMWI, Val);
}
}
@@ -11679,25 +12233,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
}
case NEON::BI__builtin_neon_vrnd32x_f32:
- case NEON::BI__builtin_neon_vrnd32xq_f32: {
+ case NEON::BI__builtin_neon_vrnd32xq_f32:
+ case NEON::BI__builtin_neon_vrnd32x_f64:
+ case NEON::BI__builtin_neon_vrnd32xq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint32x;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
}
case NEON::BI__builtin_neon_vrnd32z_f32:
- case NEON::BI__builtin_neon_vrnd32zq_f32: {
+ case NEON::BI__builtin_neon_vrnd32zq_f32:
+ case NEON::BI__builtin_neon_vrnd32z_f64:
+ case NEON::BI__builtin_neon_vrnd32zq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint32z;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
}
case NEON::BI__builtin_neon_vrnd64x_f32:
- case NEON::BI__builtin_neon_vrnd64xq_f32: {
+ case NEON::BI__builtin_neon_vrnd64xq_f32:
+ case NEON::BI__builtin_neon_vrnd64x_f64:
+ case NEON::BI__builtin_neon_vrnd64xq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint64x;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
}
case NEON::BI__builtin_neon_vrnd64z_f32:
- case NEON::BI__builtin_neon_vrnd64zq_f32: {
+ case NEON::BI__builtin_neon_vrnd64zq_f32:
+ case NEON::BI__builtin_neon_vrnd64z_f64:
+ case NEON::BI__builtin_neon_vrnd64zq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint64z;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
@@ -12243,19 +12805,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vld1_v:
case NEON::BI__builtin_neon_vld1q_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
}
case NEON::BI__builtin_neon_vst1_v:
case NEON::BI__builtin_neon_vst1q_v:
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
case NEON::BI__builtin_neon_vld1_lane_v:
case NEON::BI__builtin_neon_vld1q_lane_v: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
PtrOp0.getAlignment());
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
@@ -12263,8 +12821,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vldap1_lane_s64:
case NEON::BI__builtin_neon_vldap1q_lane_s64: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
llvm::LoadInst *LI = Builder.CreateAlignedLoad(
VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
LI->setAtomic(llvm::AtomicOrdering::Acquire);
@@ -12274,8 +12830,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vld1_dup_v:
case NEON::BI__builtin_neon_vld1q_dup_v: {
Value *V = PoisonValue::get(Ty);
- Ty = llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
PtrOp0.getAlignment());
llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
@@ -12286,86 +12840,56 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vst1q_lane_v:
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty),
- PtrOp0.getAlignment());
+ return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
case NEON::BI__builtin_neon_vstl1_lane_s64:
case NEON::BI__builtin_neon_vstl1q_lane_s64: {
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- llvm::StoreInst *SI = Builder.CreateAlignedStore(
- Ops[1], Builder.CreateBitCast(Ops[0], Ty), PtrOp0.getAlignment());
+ llvm::StoreInst *SI =
+ Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
SI->setAtomic(llvm::AtomicOrdering::Release);
return SI;
}
case NEON::BI__builtin_neon_vld2_v:
case NEON::BI__builtin_neon_vld2q_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld3_v:
case NEON::BI__builtin_neon_vld3q_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld4_v:
case NEON::BI__builtin_neon_vld4q_v: {
- llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld2_dup_v:
case NEON::BI__builtin_neon_vld2q_dup_v: {
- llvm::Type *PTy =
- llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld3_dup_v:
case NEON::BI__builtin_neon_vld3q_dup_v: {
- llvm::Type *PTy =
- llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld4_dup_v:
case NEON::BI__builtin_neon_vld4q_dup_v: {
- llvm::Type *PTy =
- llvm::PointerType::getUnqual(VTy->getElementType());
- Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
- llvm::Type *Tys[2] = { VTy, PTy };
+ llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
- Ops[0] = Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld2_lane_v:
@@ -12377,8 +12901,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld3_lane_v:
@@ -12391,8 +12913,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vld4_lane_v:
@@ -12406,8 +12926,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
- Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
case NEON::BI__builtin_neon_vst2_v:
@@ -12457,7 +12975,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vtrn_v:
case NEON::BI__builtin_neon_vtrnq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Value *SV = nullptr;
@@ -12476,7 +12993,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vuzp_v:
case NEON::BI__builtin_neon_vuzpq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Value *SV = nullptr;
@@ -12494,7 +13010,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vzip_v:
case NEON::BI__builtin_neon_vzipq_v: {
- Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
Value *SV = nullptr;
@@ -12713,9 +13228,7 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
Align Alignment) {
- // Cast the pointer to right type.
- Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(Ops[1]->getType()));
+ Value *Ptr = Ops[0];
Value *MaskVec = getMaskVecValue(
CGF, Ops[2],
@@ -12726,10 +13239,8 @@ static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
Align Alignment) {
- // Cast the pointer to right type.
llvm::Type *Ty = Ops[1]->getType();
- Value *Ptr =
- CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
+ Value *Ptr = Ops[0];
Value *MaskVec = getMaskVecValue(
CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
@@ -12740,11 +13251,7 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,
ArrayRef<Value *> Ops) {
auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
- llvm::Type *PtrTy = ResultTy->getElementType();
-
- // Cast the pointer to element type.
- Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(PtrTy));
+ Value *Ptr = Ops[0];
Value *MaskVec = getMaskVecValue(
CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
@@ -12770,11 +13277,7 @@ static Value *EmitX86CompressExpand(CodeGenFunction &CGF,
static Value *EmitX86CompressStore(CodeGenFunction &CGF,
ArrayRef<Value *> Ops) {
auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
- llvm::Type *PtrTy = ResultTy->getElementType();
-
- // Cast the pointer to element type.
- Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
- llvm::PointerType::getUnqual(PtrTy));
+ Value *Ptr = Ops[0];
Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
@@ -13316,13 +13819,10 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
}
-llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
- uint32_t Features1 = Lo_32(FeaturesMask);
- uint32_t Features2 = Hi_32(FeaturesMask);
-
+llvm::Value *
+CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
Value *Result = Builder.getTrue();
-
- if (Features1 != 0) {
+ if (FeatureMask[0] != 0) {
// Matching the struct layout from the compiler-rt/libgcc structure that is
// filled in:
// unsigned int __cpu_vendor;
@@ -13345,22 +13845,26 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
CharUnits::fromQuantity(4));
// Check the value of the bit corresponding to the feature requested.
- Value *Mask = Builder.getInt32(Features1);
+ Value *Mask = Builder.getInt32(FeatureMask[0]);
Value *Bitset = Builder.CreateAnd(Features, Mask);
Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
Result = Builder.CreateAnd(Result, Cmp);
}
- if (Features2 != 0) {
- llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
- "__cpu_features2");
- cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
-
- Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures2,
- CharUnits::fromQuantity(4));
-
+ llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
+ llvm::Constant *CpuFeatures2 =
+ CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
+ cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
+ for (int i = 1; i != 4; ++i) {
+ const uint32_t M = FeatureMask[i];
+ if (!M)
+ continue;
+ Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
+ Value *Features = Builder.CreateAlignedLoad(
+ Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs),
+ CharUnits::fromQuantity(4));
// Check the value of the bit corresponding to the feature requested.
- Value *Mask = Builder.getInt32(Features2);
+ Value *Mask = Builder.getInt32(M);
Value *Bitset = Builder.CreateAnd(Features, Mask);
Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
Result = Builder.CreateAnd(Result, Cmp);
@@ -13372,7 +13876,7 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
Value *CodeGenFunction::EmitAArch64CpuInit() {
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
llvm::FunctionCallee Func =
- CGM.CreateRuntimeFunction(FTy, "init_cpu_features_resolver");
+ CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
cast<llvm::GlobalValue>(Func.getCallee())
->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
@@ -13441,16 +13945,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
assert(Error == ASTContext::GE_None && "Should not codegen an error");
for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- continue;
- }
-
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
// These exist so that the builtin that takes an immediate can be bounds
@@ -13585,13 +14080,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
Builder.CreateStore(Ops[0], Tmp);
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
- Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
+ Tmp.getPointer());
}
case X86::BI_mm_getcsr:
case X86::BI__builtin_ia32_stmxcsr: {
Address Tmp = CreateMemTemp(E->getType());
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
- Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
+ Tmp.getPointer());
return Builder.CreateLoad(Tmp, "stmxcsr");
}
case X86::BI__builtin_ia32_xsave:
@@ -14629,12 +15124,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
BuiltinID == X86::BI__builtin_ia32_movntss)
Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
- // Convert the type of the pointer to a pointer to the stored type.
- Value *BC = Builder.CreateBitCast(
- Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
-
// Unaligned nontemporal store of the scalar value.
- StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
+ StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
SI->setAlignment(llvm::Align(1));
return SI;
@@ -15443,6 +15934,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmppd256:
IID = Intrinsic::x86_avx_cmp_pd_256;
break;
+ case X86::BI__builtin_ia32_cmpph128_mask:
+ IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
+ break;
+ case X86::BI__builtin_ia32_cmpph256_mask:
+ IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
+ break;
+ case X86::BI__builtin_ia32_cmpph512_mask:
+ IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
+ break;
case X86::BI__builtin_ia32_cmpps512_mask:
IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
break;
@@ -15696,8 +16196,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__readfsdword:
case X86::BI__readfsqword: {
llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr =
- Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
+ Value *Ptr = Builder.CreateIntToPtr(
+ Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
LoadInst *Load = Builder.CreateAlignedLoad(
IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
Load->setVolatile(true);
@@ -15708,8 +16208,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__readgsdword:
case X86::BI__readgsqword: {
llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr =
- Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
+ Value *Ptr = Builder.CreateIntToPtr(
+ Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
LoadInst *Load = Builder.CreateAlignedLoad(
IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
Load->setVolatile(true);
@@ -15723,8 +16223,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
for (int i = 0; i < 3; ++i) {
Value *Extract = Builder.CreateExtractValue(Call, i + 1);
Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
- Ptr = Builder.CreateBitCast(
- Ptr, llvm::PointerType::getUnqual(Extract->getType()));
Builder.CreateAlignedStore(Extract, Ptr, Align(1));
}
@@ -15739,8 +16237,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
for (int i = 0; i < 4; ++i) {
Value *Extract = Builder.CreateExtractValue(Call, i + 1);
Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
- Ptr = Builder.CreateBitCast(
- Ptr, llvm::PointerType::getUnqual(Extract->getType()));
Builder.CreateAlignedStore(Extract, Ptr, Align(1));
}
@@ -15941,11 +16437,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
SmallVector<Value *, 2> Ops;
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops.push_back(EmitScalarExpr(E->getArg(1)));
- if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
- BuiltinID == PPC::BI__builtin_vsx_lxvll){
- Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
- }else {
- Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
+ if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
+ BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
Ops.pop_back();
}
@@ -16013,11 +16506,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops.push_back(EmitScalarExpr(E->getArg(1)));
Ops.push_back(EmitScalarExpr(E->getArg(2)));
- if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
- BuiltinID == PPC::BI__builtin_vsx_stxvll ){
- Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
- }else {
- Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
+ if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
+ BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
Ops.pop_back();
}
@@ -16553,7 +17043,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
}
case PPC::BI__builtin_ppc_load2r: {
Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
- Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
}
@@ -16778,7 +17268,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
case PPC::BI__builtin_ppc_sthcx: {
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
- Value *Op0 = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
return Builder.CreateCall(F, {Op0, Op1});
}
@@ -16852,10 +17342,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
BuiltinID == PPC::BI__builtin_mma_stxvp) {
if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
BuiltinID == PPC::BI__builtin_mma_lxvp) {
- Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
} else {
- Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
}
Ops.pop_back();
@@ -17062,6 +17550,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
Value *Op1 = EmitScalarExpr(E->getArg(1));
return Builder.CreateFDiv(Op0, Op1, "swdiv");
}
+ case PPC::BI__builtin_ppc_set_fpscr_rn:
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
+ {EmitScalarExpr(E->getArg(0))});
+ case PPC::BI__builtin_ppc_mffs:
+ return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
}
}
@@ -17094,24 +17587,66 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
}
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
+/// Emit code based on Code Object ABI version.
+/// COV_4 : Emit code to use dispatch ptr
+/// COV_5 : Emit code to use implicitarg ptr
+/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
+/// and use its value for COV_4 or COV_5 approach. It is used for
+/// compiling device libraries in an ABI-agnostic way.
+///
+/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
+/// clang during compilation of user code.
Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
- bool IsCOV_5 = CGF.getTarget().getTargetOpts().CodeObjectVersion ==
- clang::TargetOptions::COV_5;
- Constant *Offset;
- Value *DP;
- if (IsCOV_5) {
+ llvm::LoadInst *LD;
+
+ auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
+
+ if (Cov == CodeObjectVersionKind::COV_None) {
+ StringRef Name = "__oclc_ABI_version";
+ auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
+ if (!ABIVersionC)
+ ABIVersionC = new llvm::GlobalVariable(
+ CGF.CGM.getModule(), CGF.Int32Ty, false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
+ llvm::GlobalVariable::NotThreadLocal,
+ CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
+
+ // This load will be eliminated by the IPSCCP because it is constant
+ // weak_odr without externally_initialized. Either changing it to weak or
+ // adding externally_initialized will keep the load.
+ Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
+ CGF.CGM.getIntAlign());
+
+ Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
+ ABIVersion,
+ llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
+
// Indexing the implicit kernarg segment.
- Offset = llvm::ConstantInt::get(CGF.Int32Ty, 12 + Index * 2);
- DP = EmitAMDGPUImplicitArgPtr(CGF);
- } else {
+ Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
+ CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
+
// Indexing the HSA kernel_dispatch_packet struct.
- Offset = llvm::ConstantInt::get(CGF.Int32Ty, 4 + Index * 2);
- DP = EmitAMDGPUDispatchPtr(CGF);
+ Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
+ CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
+
+ auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
+ LD = CGF.Builder.CreateLoad(
+ Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
+ } else {
+ Value *GEP = nullptr;
+ if (Cov == CodeObjectVersionKind::COV_5) {
+ // Indexing the implicit kernarg segment.
+ GEP = CGF.Builder.CreateConstGEP1_32(
+ CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
+ } else {
+ // Indexing the HSA kernel_dispatch_packet struct.
+ GEP = CGF.Builder.CreateConstGEP1_32(
+ CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
+ }
+ LD = CGF.Builder.CreateLoad(
+ Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
}
- auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
- auto *LD = CGF.Builder.CreateLoad(
- Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));
llvm::MDBuilder MDHelper(CGF.getLLVMContext());
llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
@@ -17175,6 +17710,23 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
}
+llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
+ unsigned Idx,
+ const CallExpr *E) {
+ llvm::Value *Arg = nullptr;
+ if ((ICEArguments & (1 << Idx)) == 0) {
+ Arg = EmitScalarExpr(E->getArg(Idx));
+ } else {
+ // If this is required to be a constant, constant fold it so that we
+ // know that the generated intrinsic gets a ConstantInt.
+ std::optional<llvm::APSInt> Result =
+ E->getArg(Idx)->getIntegerConstantExpr(getContext());
+ assert(Result && "Expected argument to be a constant");
+ Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
+ }
+ return Arg;
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
@@ -17225,8 +17777,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_mov_dpp:
case AMDGPU::BI__builtin_amdgcn_update_dpp: {
llvm::SmallVector<llvm::Value *, 6> Args;
- for (unsigned I = 0; I != E->getNumArgs(); ++I)
- Args.push_back(EmitScalarExpr(E->getArg(I)));
+ // Find out if any arguments are required to be integer constant
+ // expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+ for (unsigned I = 0; I != E->getNumArgs(); ++I) {
+ Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
+ }
assert(Args.size() == 5 || Args.size() == 6);
if (Args.size() == 5)
Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
@@ -17271,14 +17830,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_log_clampf:
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
case AMDGPU::BI__builtin_amdgcn_ldexp:
- case AMDGPU::BI__builtin_amdgcn_ldexpf:
- case AMDGPU::BI__builtin_amdgcn_ldexph: {
+ case AMDGPU::BI__builtin_amdgcn_ldexpf: {
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
llvm::Function *F =
CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
return Builder.CreateCall(F, {Src0, Src1});
}
+ case AMDGPU::BI__builtin_amdgcn_ldexph: {
+ // The raw instruction has a different behavior for out of bounds exponent
+ // values (implicit truncation instead of saturate to short_min/short_max).
+ llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
+ llvm::Function *F =
+ CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
+ return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
+ }
case AMDGPU::BI__builtin_amdgcn_frexp_mant:
case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
case AMDGPU::BI__builtin_amdgcn_frexp_manth:
@@ -17479,21 +18046,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
}
- case AMDGPU::BI__builtin_amdgcn_read_exec: {
- CallInst *CI = cast<CallInst>(
- EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
- CI->setConvergent();
- return CI;
- }
+ case AMDGPU::BI__builtin_amdgcn_read_exec:
+ return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
- case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
- StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
- "exec_lo" : "exec_hi";
- CallInst *CI = cast<CallInst>(
- EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName));
- CI->setConvergent();
- return CI;
- }
+ return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
+ case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
+ return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
@@ -17536,9 +18094,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
}
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
@@ -17576,6 +18138,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
ArgForMatchingRetType = 2;
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
+ ArgForMatchingRetType = 2;
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
+ ArgForMatchingRetType = 2;
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
+ break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
ArgForMatchingRetType = 4;
@@ -17660,7 +18232,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
break;
}
- Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Address Ptr = CheckAtomicAlignment(*this, E);
Value *Val = EmitScalarExpr(E->getArg(1));
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
@@ -17778,6 +18350,32 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {X, Undef});
}
+ case SystemZ::BI__builtin_s390_verllb:
+ case SystemZ::BI__builtin_s390_verllh:
+ case SystemZ::BI__builtin_s390_verllf:
+ case SystemZ::BI__builtin_s390_verllg: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
+ // Splat scalar rotate amount to vector type.
+ unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
+ Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
+ Amt = Builder.CreateVectorSplat(NumElts, Amt);
+ Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
+ return Builder.CreateCall(F, { Src, Src, Amt });
+ }
+
+ case SystemZ::BI__builtin_s390_verllvb:
+ case SystemZ::BI__builtin_s390_verllvh:
+ case SystemZ::BI__builtin_s390_verllvf:
+ case SystemZ::BI__builtin_s390_verllvg: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
+ Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
+ return Builder.CreateCall(F, { Src, Src, Amt });
+ }
+
case SystemZ::BI__builtin_s390_vfsqsb:
case SystemZ::BI__builtin_s390_vfsqdb: {
llvm::Type *ResultType = ConvertType(E->getType());
@@ -18523,9 +19121,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__nvvm_atom_add_gen_f:
case NVPTX::BI__nvvm_atom_add_gen_d: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
Value *Val = EmitScalarExpr(E->getArg(1));
- return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
+
+ return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
AtomicOrdering::SequentiallyConsistent);
}
@@ -19256,44 +19855,36 @@ RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {
/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
-/// TODO: actually use ptrmask once most optimization passes know about it.
RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
BuiltinAlignArgs Args(E, *this);
- llvm::Value *SrcAddr = Args.Src;
- if (Args.Src->getType()->isPointerTy())
- SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
- llvm::Value *SrcForMask = SrcAddr;
+ llvm::Value *SrcForMask = Args.Src;
if (AlignUp) {
// When aligning up we have to first add the mask to ensure we go over the
// next alignment value and then align down to the next valid multiple.
// By adding the mask, we ensure that align_up on an already aligned
// value will not change the value.
- SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
+ if (Args.Src->getType()->isPointerTy()) {
+ if (getLangOpts().isSignedOverflowDefined())
+ SrcForMask =
+ Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
+ else
+ SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
+ /*SignedIndices=*/true,
+ /*isSubtraction=*/false,
+ E->getExprLoc(), "over_boundary");
+ } else {
+ SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
+ }
}
// Invert the mask to only clear the lower bits.
llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
- llvm::Value *Result =
- Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
+ llvm::Value *Result = nullptr;
if (Args.Src->getType()->isPointerTy()) {
- /// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
- // Result = Builder.CreateIntrinsic(
- // Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType},
- // {SrcForMask, NegatedMask}, nullptr, "aligned_result");
- Result->setName("aligned_intptr");
- llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
- // The result must point to the same underlying allocation. This means we
- // can use an inbounds GEP to enable better optimization.
- if (getLangOpts().isSignedOverflowDefined())
- Result =
- Builder.CreateGEP(Int8Ty, Args.Src, Difference, "aligned_result");
- else
- Result = EmitCheckedInBoundsGEP(Int8Ty, Args.Src, Difference,
- /*SignedIndices=*/true,
- /*isSubtraction=*/!AlignUp,
- E->getExprLoc(), "aligned_result");
- // Emit an alignment assumption to ensure that the new alignment is
- // propagated to loads/stores, etc.
- emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
+ Result = Builder.CreateIntrinsic(
+ Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
+ {SrcForMask, InvertedMask}, nullptr, "aligned_result");
+ } else {
+ Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
}
assert(Result->getType() == Args.SrcType);
return RValue::get(Result);
@@ -19997,8 +20588,7 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
// The base pointer is passed by address, so it needs to be loaded.
Address A = EmitPointerWithAlignment(E->getArg(0));
- Address BP = Address(Builder.CreateBitCast(
- A.getPointer(), Int8PtrPtrTy), Int8PtrTy, A.getAlignment());
+ Address BP = Address(A.getPointer(), Int8PtrTy, A.getAlignment());
llvm::Value *Base = Builder.CreateLoad(BP);
// The treatment of both loads and stores is the same: the arguments for
// the builtin are the same as the arguments for the intrinsic.
@@ -20033,15 +20623,13 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
// The intrinsic generates one result, which is the new value for the base
// pointer. It needs to be returned. The result of the load instruction is
// passed to intrinsic by address, so the value needs to be stored.
- llvm::Value *BaseAddress =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+ llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
// Expressions like &(*pt++) will be incremented per evaluation.
// EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
// per call.
Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
- DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
- Int8Ty, DestAddr.getAlignment());
+ DestAddr = Address(DestAddr.getPointer(), Int8Ty, DestAddr.getAlignment());
llvm::Value *DestAddress = DestAddr.getPointer();
// Operands are Base, Dest, Modifier.
@@ -20214,17 +20802,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
Ops.push_back(AggValue);
continue;
}
-
- // If this is a normal argument, just emit it as a scalar.
- if ((ICEArguments & (1 << i)) == 0) {
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
- continue;
- }
-
- // If this is required to be a constant, constant fold it so that we know
- // that the generated intrinsic gets a ConstantInt.
- Ops.push_back(llvm::ConstantInt::get(
- getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
+ Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -20362,11 +20940,13 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
// Zihintntl
case RISCV::BI__builtin_riscv_ntl_load: {
llvm::Type *ResTy = ConvertType(E->getType());
- ConstantInt *Mode = cast<ConstantInt>(Ops[1]);
+ unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
+ if (Ops.size() == 2)
+ DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
getLLVMContext(),
- llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue())));
+ llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
llvm::MDNode *NontemporalNode = llvm::MDNode::get(
getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
@@ -20388,18 +20968,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
return Load;
}
case RISCV::BI__builtin_riscv_ntl_store: {
- ConstantInt *Mode = cast<ConstantInt>(Ops[2]);
+ unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
+ if (Ops.size() == 3)
+ DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
getLLVMContext(),
- llvm::ConstantAsMetadata::get(Builder.getInt32(Mode->getZExtValue())));
+ llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
llvm::MDNode *NontemporalNode = llvm::MDNode::get(
getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
- Value *BC = Builder.CreateBitCast(
- Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType()), "cast");
-
- StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], BC);
+ StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
RISCVDomainNode);
@@ -20418,129 +20997,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
return Builder.CreateCall(F, Ops, "");
}
-
-Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID,
- const CallExpr *E) {
- SmallVector<Value *, 4> Ops;
-
- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
- Ops.push_back(EmitScalarExpr(E->getArg(i)));
-
- Intrinsic::ID ID = Intrinsic::not_intrinsic;
-
- switch (BuiltinID) {
- default:
- llvm_unreachable("unexpected builtin ID.");
- case LoongArch::BI__builtin_loongarch_cacop_d:
- ID = Intrinsic::loongarch_cacop_d;
- break;
- case LoongArch::BI__builtin_loongarch_cacop_w:
- ID = Intrinsic::loongarch_cacop_w;
- break;
- case LoongArch::BI__builtin_loongarch_dbar:
- ID = Intrinsic::loongarch_dbar;
- break;
- case LoongArch::BI__builtin_loongarch_break:
- ID = Intrinsic::loongarch_break;
- break;
- case LoongArch::BI__builtin_loongarch_ibar:
- ID = Intrinsic::loongarch_ibar;
- break;
- case LoongArch::BI__builtin_loongarch_movfcsr2gr:
- ID = Intrinsic::loongarch_movfcsr2gr;
- break;
- case LoongArch::BI__builtin_loongarch_movgr2fcsr:
- ID = Intrinsic::loongarch_movgr2fcsr;
- break;
- case LoongArch::BI__builtin_loongarch_syscall:
- ID = Intrinsic::loongarch_syscall;
- break;
- case LoongArch::BI__builtin_loongarch_crc_w_b_w:
- ID = Intrinsic::loongarch_crc_w_b_w;
- break;
- case LoongArch::BI__builtin_loongarch_crc_w_h_w:
- ID = Intrinsic::loongarch_crc_w_h_w;
- break;
- case LoongArch::BI__builtin_loongarch_crc_w_w_w:
- ID = Intrinsic::loongarch_crc_w_w_w;
- break;
- case LoongArch::BI__builtin_loongarch_crc_w_d_w:
- ID = Intrinsic::loongarch_crc_w_d_w;
- break;
- case LoongArch::BI__builtin_loongarch_crcc_w_b_w:
- ID = Intrinsic::loongarch_crcc_w_b_w;
- break;
- case LoongArch::BI__builtin_loongarch_crcc_w_h_w:
- ID = Intrinsic::loongarch_crcc_w_h_w;
- break;
- case LoongArch::BI__builtin_loongarch_crcc_w_w_w:
- ID = Intrinsic::loongarch_crcc_w_w_w;
- break;
- case LoongArch::BI__builtin_loongarch_crcc_w_d_w:
- ID = Intrinsic::loongarch_crcc_w_d_w;
- break;
- case LoongArch::BI__builtin_loongarch_csrrd_w:
- ID = Intrinsic::loongarch_csrrd_w;
- break;
- case LoongArch::BI__builtin_loongarch_csrwr_w:
- ID = Intrinsic::loongarch_csrwr_w;
- break;
- case LoongArch::BI__builtin_loongarch_csrxchg_w:
- ID = Intrinsic::loongarch_csrxchg_w;
- break;
- case LoongArch::BI__builtin_loongarch_csrrd_d:
- ID = Intrinsic::loongarch_csrrd_d;
- break;
- case LoongArch::BI__builtin_loongarch_csrwr_d:
- ID = Intrinsic::loongarch_csrwr_d;
- break;
- case LoongArch::BI__builtin_loongarch_csrxchg_d:
- ID = Intrinsic::loongarch_csrxchg_d;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrrd_b:
- ID = Intrinsic::loongarch_iocsrrd_b;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrrd_h:
- ID = Intrinsic::loongarch_iocsrrd_h;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrrd_w:
- ID = Intrinsic::loongarch_iocsrrd_w;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrrd_d:
- ID = Intrinsic::loongarch_iocsrrd_d;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrwr_b:
- ID = Intrinsic::loongarch_iocsrwr_b;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrwr_h:
- ID = Intrinsic::loongarch_iocsrwr_h;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrwr_w:
- ID = Intrinsic::loongarch_iocsrwr_w;
- break;
- case LoongArch::BI__builtin_loongarch_iocsrwr_d:
- ID = Intrinsic::loongarch_iocsrwr_d;
- break;
- case LoongArch::BI__builtin_loongarch_cpucfg:
- ID = Intrinsic::loongarch_cpucfg;
- break;
- case LoongArch::BI__builtin_loongarch_asrtle_d:
- ID = Intrinsic::loongarch_asrtle_d;
- break;
- case LoongArch::BI__builtin_loongarch_asrtgt_d:
- ID = Intrinsic::loongarch_asrtgt_d;
- break;
- case LoongArch::BI__builtin_loongarch_lddir_d:
- ID = Intrinsic::loongarch_lddir_d;
- break;
- case LoongArch::BI__builtin_loongarch_ldpte_d:
- ID = Intrinsic::loongarch_ldpte_d;
- break;
- // TODO: Support more Intrinsics.
- }
-
- assert(ID != Intrinsic::not_intrinsic);
-
- llvm::Function *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Ops);
-}
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 08769c98dc29..520b0c4f1176 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -19,6 +19,7 @@
#include "clang/Basic/Cuda.h"
#include "clang/CodeGen/CodeGenABITypes.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -226,18 +227,15 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
TheModule(CGM.getModule()),
RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
DeviceMC(InitDeviceMC(CGM)) {
- CodeGen::CodeGenTypes &Types = CGM.getTypes();
- ASTContext &Ctx = CGM.getContext();
-
IntTy = CGM.IntTy;
SizeTy = CGM.SizeTy;
VoidTy = CGM.VoidTy;
Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
Zeros[1] = Zeros[0];
- CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
- VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
- VoidPtrPtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext());
+ CharPtrTy = CGM.UnqualPtrTy;
+ VoidPtrTy = CGM.UnqualPtrTy;
+ VoidPtrPtrTy = CGM.UnqualPtrTy;
}
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
@@ -558,7 +556,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
- Builder.CreateBitCast(KernelHandles[I.Kernel->getName()], VoidPtrTy),
+ KernelHandles[I.Kernel->getName()],
KernelName,
KernelName,
llvm::ConstantInt::get(IntTy, -1),
@@ -633,8 +631,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
replaceManagedVar(Var, ManagedVar);
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
- Builder.CreateBitCast(ManagedVar, VoidPtrTy),
- Builder.CreateBitCast(Var, VoidPtrTy),
+ ManagedVar,
+ Var,
VarName,
llvm::ConstantInt::get(VarSizeTy, VarSize),
llvm::ConstantInt::get(IntTy, Var->getAlignment())};
@@ -643,7 +641,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
} else {
llvm::Value *Args[] = {
&GpuBinaryHandlePtr,
- Builder.CreateBitCast(Var, VoidPtrTy),
+ Var,
VarName,
VarName,
llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
@@ -657,15 +655,15 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
case DeviceVarFlags::Surface:
Builder.CreateCall(
RegisterSurf,
- {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
- VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
+ {&GpuBinaryHandlePtr, Var, VarName, VarName,
+ llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
break;
case DeviceVarFlags::Texture:
Builder.CreateCall(
RegisterTex,
- {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
- VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
+ {&GpuBinaryHandlePtr, Var, VarName, VarName,
+ llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
break;
@@ -862,9 +860,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
{
CtorBuilder.SetInsertPoint(IfBlock);
// GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
- llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
- RegisterFatbinFunc,
- CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+ llvm::CallInst *RegisterFatbinCall =
+ CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
CtorBuilder.CreateBr(ExitBlock);
}
@@ -880,9 +877,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// Register binary with CUDA runtime. This is substantially different in
// default mode vs. separate compilation!
// GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
- llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
- RegisterFatbinFunc,
- CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+ llvm::CallInst *RegisterFatbinCall =
+ CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
@@ -923,9 +919,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
- llvm::Value *Args[] = {RegisterGlobalsFunc,
- CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
- ModuleIDConstant,
+ llvm::Value *Args[] = {RegisterGlobalsFunc, FatbinWrapper, ModuleIDConstant,
makeDummyFunction(getCallbackFnTy())};
CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
}
@@ -1132,33 +1126,45 @@ void CGNVCUDARuntime::transformManagedVars() {
// registered. The linker will provide a pointer to this section so we can
// register the symbols with the linked device image.
void CGNVCUDARuntime::createOffloadingEntries() {
- llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule());
- OMPBuilder.initialize();
-
StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries"
: "cuda_offloading_entries";
+ llvm::Module &M = CGM.getModule();
for (KernelInfo &I : EmittedKernels)
- OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel->getName()],
- getDeviceSideName(cast<NamedDecl>(I.D)), 0,
- DeviceVarFlags::OffloadGlobalEntry, Section);
+ llvm::offloading::emitOffloadingEntry(
+ M, KernelHandles[I.Kernel->getName()],
+ getDeviceSideName(cast<NamedDecl>(I.D)), /*Flags=*/0, /*Data=*/0,
+ llvm::offloading::OffloadGlobalEntry, Section);
for (VarInfo &I : DeviceVars) {
uint64_t VarSize =
CGM.getDataLayout().getTypeAllocSize(I.Var->getValueType());
+ int32_t Flags =
+ (I.Flags.isExtern()
+ ? static_cast<int32_t>(llvm::offloading::OffloadGlobalExtern)
+ : 0) |
+ (I.Flags.isConstant()
+ ? static_cast<int32_t>(llvm::offloading::OffloadGlobalConstant)
+ : 0) |
+ (I.Flags.isNormalized()
+ ? static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
+ : 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- OMPBuilder.emitOffloadingEntry(
- I.Var, getDeviceSideName(I.D), VarSize,
- I.Flags.isManaged() ? DeviceVarFlags::OffloadGlobalManagedEntry
- : DeviceVarFlags::OffloadGlobalEntry,
- Section);
+ llvm::offloading::emitOffloadingEntry(
+ M, I.Var, getDeviceSideName(I.D), VarSize,
+ (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
+ : llvm::offloading::OffloadGlobalEntry) |
+ Flags,
+ /*Data=*/0, Section);
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
- OMPBuilder.emitOffloadingEntry(I.Var, getDeviceSideName(I.D), VarSize,
- DeviceVarFlags::OffloadGlobalSurfaceEntry,
- Section);
+ llvm::offloading::emitOffloadingEntry(
+ M, I.Var, getDeviceSideName(I.D), VarSize,
+ llvm::offloading::OffloadGlobalSurfaceEntry | Flags,
+ I.Flags.getSurfTexType(), Section);
} else if (I.Flags.getKind() == DeviceVarFlags::Texture) {
- OMPBuilder.emitOffloadingEntry(I.Var, getDeviceSideName(I.D), VarSize,
- DeviceVarFlags::OffloadGlobalTextureEntry,
- Section);
+ llvm::offloading::emitOffloadingEntry(
+ M, I.Var, getDeviceSideName(I.D), VarSize,
+ llvm::offloading::OffloadGlobalTextureEntry | Flags,
+ I.Flags.getSurfTexType(), Section);
}
}
}
@@ -1234,7 +1240,10 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
Var->setAlignment(CGM.getPointerAlign().getAsAlign());
Var->setDSOLocal(F->isDSOLocal());
Var->setVisibility(F->getVisibility());
- CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var);
+ auto *FD = cast<FunctionDecl>(GD.getDecl());
+ auto *FT = FD->getPrimaryTemplate();
+ if (!FT || FT->isThisDeclarationADefinition())
+ CGM.maybeSetTrivialComdat(*FD, *Var);
KernelHandles[F->getName()] = Var;
KernelStubs[Var] = F;
return Var;
diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index 9a9c6d26cc63..c7af8f1cf0fe 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -17,6 +17,7 @@
#include "clang/AST/GlobalDecl.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/IR/GlobalValue.h"
namespace llvm {
@@ -52,19 +53,6 @@ public:
Texture, // Builtin texture
};
- /// The kind flag for an offloading entry.
- enum OffloadEntryKindFlag : uint32_t {
- /// Mark the entry as a global entry. This indicates the presense of a
- /// kernel if the size field is zero and a variable otherwise.
- OffloadGlobalEntry = 0x0,
- /// Mark the entry as a managed global variable.
- OffloadGlobalManagedEntry = 0x1,
- /// Mark the entry as a surface variable.
- OffloadGlobalSurfaceEntry = 0x2,
- /// Mark the entry as a texture variable.
- OffloadGlobalTextureEntry = 0x3,
- };
-
private:
unsigned Kind : 2;
unsigned Extern : 1;
diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp
index 7b77dd7875bc..a8bf57a277e9 100644
--- a/clang/lib/CodeGen/CGCXXABI.cpp
+++ b/clang/lib/CodeGen/CGCXXABI.cpp
@@ -120,10 +120,10 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
// FIXME: I'm not entirely sure I like using a fake decl just for code
// generation. Maybe we can come up with a better way?
- auto *ThisDecl = ImplicitParamDecl::Create(
- CGM.getContext(), nullptr, MD->getLocation(),
- &CGM.getContext().Idents.get("this"), MD->getThisType(),
- ImplicitParamDecl::CXXThis);
+ auto *ThisDecl =
+ ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(),
+ &CGM.getContext().Idents.get("this"),
+ MD->getThisType(), ImplicitParamKind::CXXThis);
params.push_back(ThisDecl);
CGF.CXXABIThisDecl = ThisDecl;
@@ -312,8 +312,7 @@ void CGCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV,
llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage(
GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const {
// Delegate back to CGM by default.
- return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage,
- /*IsConstantVariable=*/false);
+ return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage);
}
bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index bd272e016e92..a24aeea7ae32 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -13,6 +13,7 @@
#include "CGCall.h"
#include "ABIInfo.h"
+#include "ABIInfoImpl.h"
#include "CGBlocks.h"
#include "CGCXXABI.h"
#include "CGCleanup.h"
@@ -71,6 +72,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
case CC_Swift: return llvm::CallingConv::Swift;
case CC_SwiftAsync: return llvm::CallingConv::SwiftTail;
+ case CC_M68kRTD: return llvm::CallingConv::M68k_RTD;
}
}
@@ -112,8 +114,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
// When translating an unprototyped function type, always use a
// variadic type.
return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
- /*instanceMethod=*/false,
- /*chainCall=*/false, std::nullopt,
+ FnInfoOpts::None, std::nullopt,
FTNP->getExtInfo(), {}, RequiredArgs(0));
}
@@ -189,10 +190,10 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
appendParameterTypes(CGT, prefix, paramInfos, FTP);
CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
- return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
- /*chainCall=*/false, prefix,
- FTP->getExtInfo(), paramInfos,
- Required);
+ FnInfoOpts opts =
+ instanceMethod ? FnInfoOpts::IsInstanceMethod : FnInfoOpts::None;
+ return CGT.arrangeLLVMFunctionInfo(resultType, opts, prefix,
+ FTP->getExtInfo(), paramInfos, Required);
}
/// Arrange the argument and result information for a value of the
@@ -252,6 +253,9 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
if (D->hasAttr<PreserveAllAttr>())
return CC_PreserveAll;
+ if (D->hasAttr<M68kRTDAttr>())
+ return CC_M68kRTD;
+
return CC_C;
}
@@ -271,7 +275,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
argTypes.push_back(DeriveThisType(RD, MD));
return ::arrangeLLVMFunctionInfo(
- *this, true, argTypes,
+ *this, /*instanceMethod=*/true, argTypes,
FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>());
}
@@ -298,7 +302,7 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
setCUDAKernelCallingConvention(FT, CGM, MD);
auto prototype = FT.getAs<FunctionProtoType>();
- if (MD->isInstance()) {
+ if (MD->isImplicitObjectMemberFunction()) {
// The abstract case is perfectly fine.
const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(MD);
return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD);
@@ -363,9 +367,8 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) {
: TheCXXABI.hasMostDerivedReturn(GD)
? CGM.getContext().VoidPtrTy
: Context.VoidTy;
- return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true,
- /*chainCall=*/false, argTypes, extInfo,
- paramInfos, required);
+ return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::IsInstanceMethod,
+ argTypes, extInfo, paramInfos, required);
}
static SmallVector<CanQualType, 16>
@@ -439,9 +442,9 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
addExtParameterInfosForCall(ParamInfos, FPT.getTypePtr(), TotalPrefixArgs,
ArgTypes.size());
}
- return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true,
- /*chainCall=*/false, ArgTypes, Info,
- ParamInfos, Required);
+
+ return arrangeLLVMFunctionInfo(ResultType, FnInfoOpts::IsInstanceMethod,
+ ArgTypes, Info, ParamInfos, Required);
}
/// Arrange the argument and result information for the declaration or
@@ -449,7 +452,7 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
const CGFunctionInfo &
CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD))
- if (MD->isInstance())
+ if (MD->isImplicitObjectMemberFunction())
return arrangeCXXMethodDeclaration(MD);
CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
@@ -460,10 +463,9 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
// When declaring a function without a prototype, always use a
// non-variadic type.
if (CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>()) {
- return arrangeLLVMFunctionInfo(
- noProto->getReturnType(), /*instanceMethod=*/false,
- /*chainCall=*/false, std::nullopt, noProto->getExtInfo(), {},
- RequiredArgs::All);
+ return arrangeLLVMFunctionInfo(noProto->getReturnType(), FnInfoOpts::None,
+ std::nullopt, noProto->getExtInfo(), {},
+ RequiredArgs::All);
}
return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>());
@@ -512,9 +514,9 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
RequiredArgs required =
(MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);
- return arrangeLLVMFunctionInfo(
- GetReturnType(MD->getReturnType()), /*instanceMethod=*/false,
- /*chainCall=*/false, argTys, einfo, extParamInfos, required);
+ return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()),
+ FnInfoOpts::None, argTys, einfo, extParamInfos,
+ required);
}
const CGFunctionInfo &
@@ -523,9 +525,8 @@ CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType,
auto argTypes = getArgTypesForCall(Context, args);
FunctionType::ExtInfo einfo;
- return arrangeLLVMFunctionInfo(
- GetReturnType(returnType), /*instanceMethod=*/false,
- /*chainCall=*/false, argTypes, einfo, {}, RequiredArgs::All);
+ return arrangeLLVMFunctionInfo(GetReturnType(returnType), FnInfoOpts::None,
+ argTypes, einfo, {}, RequiredArgs::All);
}
const CGFunctionInfo &
@@ -550,8 +551,7 @@ CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) {
assert(MD->isVirtual() && "only methods have thunks");
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
CanQualType ArgTys[] = {DeriveThisType(MD->getParent(), MD)};
- return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
- /*chainCall=*/false, ArgTys,
+ return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::None, ArgTys,
FTP->getExtInfo(), {}, RequiredArgs(1));
}
@@ -570,9 +570,8 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
ArgTys.push_back(Context.IntTy);
CallingConv CC = Context.getDefaultCallingConvention(
/*IsVariadic=*/false, /*IsCXXMethod=*/true);
- return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true,
- /*chainCall=*/false, ArgTys,
- FunctionType::ExtInfo(CC), {},
+ return arrangeLLVMFunctionInfo(Context.VoidTy, FnInfoOpts::IsInstanceMethod,
+ ArgTys, FunctionType::ExtInfo(CC), {},
RequiredArgs::All);
}
@@ -616,10 +615,10 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
SmallVector<CanQualType, 16> argTypes;
for (const auto &arg : args)
argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty));
+ FnInfoOpts opts = chainCall ? FnInfoOpts::IsChainCall : FnInfoOpts::None;
return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()),
- /*instanceMethod=*/false, chainCall,
- argTypes, fnType->getExtInfo(), paramInfos,
- required);
+ opts, argTypes, fnType->getExtInfo(),
+ paramInfos, required);
}
/// Figure out the rules for calling a function with the given formal
@@ -650,8 +649,8 @@ CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto,
auto argTypes = getArgTypesForDeclaration(Context, params);
return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()),
- /*instanceMethod*/ false, /*chainCall*/ false,
- argTypes, proto->getExtInfo(), paramInfos,
+ FnInfoOpts::None, argTypes,
+ proto->getExtInfo(), paramInfos,
RequiredArgs::forPrototypePlus(proto, 1));
}
@@ -662,10 +661,9 @@ CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType,
SmallVector<CanQualType, 16> argTypes;
for (const auto &Arg : args)
argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
- return arrangeLLVMFunctionInfo(
- GetReturnType(resultType), /*instanceMethod=*/false,
- /*chainCall=*/false, argTypes, FunctionType::ExtInfo(),
- /*paramInfos=*/ {}, RequiredArgs::All);
+ return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
+ argTypes, FunctionType::ExtInfo(),
+ /*paramInfos=*/{}, RequiredArgs::All);
}
const CGFunctionInfo &
@@ -673,17 +671,17 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType,
const FunctionArgList &args) {
auto argTypes = getArgTypesForDeclaration(Context, args);
- return arrangeLLVMFunctionInfo(
- GetReturnType(resultType), /*instanceMethod=*/false, /*chainCall=*/false,
- argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+ return arrangeLLVMFunctionInfo(GetReturnType(resultType), FnInfoOpts::None,
+ argTypes, FunctionType::ExtInfo(), {},
+ RequiredArgs::All);
}
const CGFunctionInfo &
CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
ArrayRef<CanQualType> argTypes) {
- return arrangeLLVMFunctionInfo(
- resultType, /*instanceMethod=*/false, /*chainCall=*/false,
- argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+ return arrangeLLVMFunctionInfo(resultType, FnInfoOpts::None, argTypes,
+ FunctionType::ExtInfo(), {},
+ RequiredArgs::All);
}
/// Arrange a call to a C++ method, passing the given arguments.
@@ -706,15 +704,15 @@ CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
auto argTypes = getArgTypesForCall(Context, args);
FunctionType::ExtInfo info = proto->getExtInfo();
- return arrangeLLVMFunctionInfo(
- GetReturnType(proto->getReturnType()), /*instanceMethod=*/true,
- /*chainCall=*/false, argTypes, info, paramInfos, required);
+ return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()),
+ FnInfoOpts::IsInstanceMethod, argTypes, info,
+ paramInfos, required);
}
const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
- return arrangeLLVMFunctionInfo(
- getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false,
- std::nullopt, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+ return arrangeLLVMFunctionInfo(getContext().VoidTy, FnInfoOpts::None,
+ std::nullopt, FunctionType::ExtInfo(), {},
+ RequiredArgs::All);
}
const CGFunctionInfo &
@@ -734,12 +732,15 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature,
auto argTypes = getArgTypesForCall(Context, args);
assert(signature.getRequiredArgs().allowsOptionalArgs());
- return arrangeLLVMFunctionInfo(signature.getReturnType(),
- signature.isInstanceMethod(),
- signature.isChainCall(),
- argTypes,
- signature.getExtInfo(),
- paramInfos,
+ FnInfoOpts opts = FnInfoOpts::None;
+ if (signature.isInstanceMethod())
+ opts |= FnInfoOpts::IsInstanceMethod;
+ if (signature.isChainCall())
+ opts |= FnInfoOpts::IsChainCall;
+ if (signature.isDelegateCall())
+ opts |= FnInfoOpts::IsDelegateCall;
+ return arrangeLLVMFunctionInfo(signature.getReturnType(), opts, argTypes,
+ signature.getExtInfo(), paramInfos,
signature.getRequiredArgs());
}
@@ -752,21 +753,24 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI);
/// Arrange the argument and result information for an abstract value
/// of a given function type. This is the method which all of the
/// above functions ultimately defer to.
-const CGFunctionInfo &
-CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
- bool instanceMethod,
- bool chainCall,
- ArrayRef<CanQualType> argTypes,
- FunctionType::ExtInfo info,
- ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
- RequiredArgs required) {
+const CGFunctionInfo &CodeGenTypes::arrangeLLVMFunctionInfo(
+ CanQualType resultType, FnInfoOpts opts, ArrayRef<CanQualType> argTypes,
+ FunctionType::ExtInfo info,
+ ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
+ RequiredArgs required) {
assert(llvm::all_of(argTypes,
[](CanQualType T) { return T.isCanonicalAsParam(); }));
// Lookup or create unique function info.
llvm::FoldingSetNodeID ID;
- CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos,
- required, resultType, argTypes);
+ bool isInstanceMethod =
+ (opts & FnInfoOpts::IsInstanceMethod) == FnInfoOpts::IsInstanceMethod;
+ bool isChainCall =
+ (opts & FnInfoOpts::IsChainCall) == FnInfoOpts::IsChainCall;
+ bool isDelegateCall =
+ (opts & FnInfoOpts::IsDelegateCall) == FnInfoOpts::IsDelegateCall;
+ CGFunctionInfo::Profile(ID, isInstanceMethod, isChainCall, isDelegateCall,
+ info, paramInfos, required, resultType, argTypes);
void *insertPos = nullptr;
CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
@@ -776,8 +780,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
// Construct the function info. We co-allocate the ArgInfos.
- FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
- paramInfos, resultType, argTypes, required);
+ FI = CGFunctionInfo::create(CC, isInstanceMethod, isChainCall, isDelegateCall,
+ info, paramInfos, resultType, argTypes, required);
FunctionInfos.InsertNode(FI, insertPos);
bool inserted = FunctionsBeingProcessed.insert(FI).second;
@@ -812,9 +816,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
return *FI;
}
-CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
- bool instanceMethod,
- bool chainCall,
+CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, bool instanceMethod,
+ bool chainCall, bool delegateCall,
const FunctionType::ExtInfo &info,
ArrayRef<ExtParameterInfo> paramInfos,
CanQualType resultType,
@@ -834,6 +837,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
FI->ASTCallingConvention = info.getCC();
FI->InstanceMethod = instanceMethod;
FI->ChainCall = chainCall;
+ FI->DelegateCall = delegateCall;
FI->CmseNSCall = info.getCmseNSCall();
FI->NoReturn = info.getNoReturn();
FI->ReturnsRetained = info.getProducesResult();
@@ -1376,7 +1380,7 @@ static void CreateCoercedStore(llvm::Value *Src,
llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
if (SrcPtrTy && DstPtrTy &&
SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
- Src = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DstTy);
+ Src = CGF.Builder.CreateAddrSpaceCast(Src, DstTy);
CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
return;
}
@@ -1762,6 +1766,15 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) &&
FPT->isNothrow())
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+
+ if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+ FuncAttrs.addAttribute("aarch64_pstate_sm_enabled");
+ if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask)
+ FuncAttrs.addAttribute("aarch64_pstate_sm_compatible");
+ if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+ FuncAttrs.addAttribute("aarch64_pstate_za_shared");
+ if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZAPreservedMask)
+ FuncAttrs.addAttribute("aarch64_pstate_za_preserved");
}
static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs,
@@ -1992,11 +2005,45 @@ static void getTrivialDefaultFunctionAttributes(
}
}
-/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as
-/// though we had emitted it ourselves. We remove any attributes on F that
-/// conflict with the attributes we add here.
-static void mergeDefaultFunctionDefinitionAttributes(
- llvm::Function &F, const CodeGenOptions CodeGenOpts,
+/// Merges `target-features` from \TargetOpts and \F, and sets the result in
+/// \FuncAttr
+/// * features from \F are always kept
+/// * a feature from \TargetOpts is kept if itself and its opposite are absent
+/// from \F
+static void
+overrideFunctionFeaturesWithTargetFeatures(llvm::AttrBuilder &FuncAttr,
+ const llvm::Function &F,
+ const TargetOptions &TargetOpts) {
+ auto FFeatures = F.getFnAttribute("target-features");
+
+ llvm::StringSet<> MergedNames;
+ SmallVector<StringRef> MergedFeatures;
+ MergedFeatures.reserve(TargetOpts.Features.size());
+
+ auto AddUnmergedFeatures = [&](auto &&FeatureRange) {
+ for (StringRef Feature : FeatureRange) {
+ if (Feature.empty())
+ continue;
+ assert(Feature[0] == '+' || Feature[0] == '-');
+ StringRef Name = Feature.drop_front(1);
+ bool Merged = !MergedNames.insert(Name).second;
+ if (!Merged)
+ MergedFeatures.push_back(Feature);
+ }
+ };
+
+ if (FFeatures.isValid())
+ AddUnmergedFeatures(llvm::split(FFeatures.getValueAsString(), ','));
+ AddUnmergedFeatures(TargetOpts.Features);
+
+ if (!MergedFeatures.empty()) {
+ llvm::sort(MergedFeatures);
+ FuncAttr.addAttribute("target-features", llvm::join(MergedFeatures, ","));
+ }
+}
+
+void CodeGen::mergeDefaultFunctionDefinitionAttributes(
+ llvm::Function &F, const CodeGenOptions &CodeGenOpts,
const LangOptions &LangOpts, const TargetOptions &TargetOpts,
bool WillInternalize) {
@@ -2052,16 +2099,10 @@ static void mergeDefaultFunctionDefinitionAttributes(
F.removeFnAttrs(AttrsToRemove);
addDenormalModeAttrs(Merged, MergedF32, FuncAttrs);
- F.addFnAttrs(FuncAttrs);
-}
-void clang::CodeGen::mergeDefaultFunctionDefinitionAttributes(
- llvm::Function &F, const CodeGenOptions CodeGenOpts,
- const LangOptions &LangOpts, const TargetOptions &TargetOpts,
- bool WillInternalize) {
+ overrideFunctionFeaturesWithTargetFeatures(FuncAttrs, F, TargetOpts);
- ::mergeDefaultFunctionDefinitionAttributes(F, CodeGenOpts, LangOpts,
- TargetOpts, WillInternalize);
+ F.addFnAttrs(FuncAttrs);
}
void CodeGenModule::getTrivialDefaultFunctionAttributes(
@@ -2084,23 +2125,6 @@ void CodeGenModule::getDefaultFunctionAttributes(StringRef Name,
addMergableDefaultFunctionAttributes(CodeGenOpts, FuncAttrs);
}
-void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) {
- llvm::AttrBuilder FuncAttrs(F.getContext());
- getDefaultFunctionAttributes(F.getName(), F.hasOptNone(),
- /* AttrOnCallSite = */ false, FuncAttrs);
- // TODO: call GetCPUAndFeaturesAttributes?
- F.addFnAttrs(FuncAttrs);
-}
-
-/// Apply default attributes to \p F, accounting for merge semantics of
-/// attributes that should not overwrite existing attributes.
-void CodeGenModule::mergeDefaultFunctionDefinitionAttributes(
- llvm::Function &F, bool WillInternalize) {
- ::mergeDefaultFunctionDefinitionAttributes(F, getCodeGenOpts(), getLangOpts(),
- getTarget().getTargetOpts(),
- WillInternalize);
-}
-
void CodeGenModule::addDefaultFunctionDefinitionAttributes(
llvm::AttrBuilder &attrs) {
getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false,
@@ -2148,7 +2172,8 @@ static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types,
const llvm::DataLayout &DL, const ABIArgInfo &AI,
bool CheckCoerce = true) {
llvm::Type *Ty = Types.ConvertTypeForMem(QTy);
- if (AI.getKind() == ABIArgInfo::Indirect)
+ if (AI.getKind() == ABIArgInfo::Indirect ||
+ AI.getKind() == ABIArgInfo::IndirectAliased)
return true;
if (AI.getKind() == ABIArgInfo::Extend)
return true;
@@ -2247,6 +2272,17 @@ static llvm::FPClassTest getNoFPClassTestMask(const LangOptions &LangOpts) {
return Mask;
}
+void CodeGenModule::AdjustMemoryAttribute(StringRef Name,
+ CGCalleeInfo CalleeInfo,
+ llvm::AttributeList &Attrs) {
+ if (Attrs.getMemoryEffects().getModRef() == llvm::ModRefInfo::NoModRef) {
+ Attrs = Attrs.removeFnAttribute(getLLVMContext(), llvm::Attribute::Memory);
+ llvm::Attribute MemoryAttr = llvm::Attribute::getWithMemoryEffects(
+ getLLVMContext(), llvm::MemoryEffects::writeOnly());
+ Attrs = Attrs.addFnAttribute(getLLVMContext(), MemoryAttr);
+ }
+}
+
/// Construct the IR attribute list of a function or call.
///
/// When adding an attribute, please consider where it should be handled:
@@ -2364,7 +2400,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// gcc specifies that 'pure' functions cannot have infinite loops.
FuncAttrs.addAttribute(llvm::Attribute::WillReturn);
} else if (TargetDecl->hasAttr<NoAliasAttr>()) {
- FuncAttrs.addMemoryAttr(llvm::MemoryEffects::argMemOnly());
+ FuncAttrs.addMemoryAttr(llvm::MemoryEffects::inaccessibleOrArgMemOnly());
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}
if (TargetDecl->hasAttr<RestrictAttr>())
@@ -2398,10 +2434,21 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
// to the compiler that the global work-size be a multiple of
// the work-group size specified to clEnqueueNDRangeKernel
// (i.e. work groups are uniform).
- FuncAttrs.addAttribute("uniform-work-group-size",
- llvm::toStringRef(CodeGenOpts.UniformWGSize));
+ FuncAttrs.addAttribute(
+ "uniform-work-group-size",
+ llvm::toStringRef(getLangOpts().OffloadUniformBlock));
}
}
+
+ if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
+ getLangOpts().OffloadUniformBlock)
+ FuncAttrs.addAttribute("uniform-work-group-size", "true");
+
+ if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
+ FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+ if (TargetDecl->hasAttr<ArmNewZAAttr>())
+ FuncAttrs.addAttribute("aarch64_pstate_za_new");
}
// Attach "no-builtins" attributes to:
@@ -2593,7 +2640,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
llvm::AttrBuilder Attrs(getLLVMContext());
QualType ThisTy =
- FI.arg_begin()->type.castAs<PointerType>()->getPointeeType();
+ FI.arg_begin()->type.getTypePtr()->getPointeeType();
if (!CodeGenOpts.NullPointerIsValid &&
getTypes().getTargetAddressSpace(FI.arg_begin()->type) == 0) {
@@ -2672,7 +2719,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
auto *Decl = ParamType->getAsRecordDecl();
if (CodeGenOpts.PassByValueIsNoAlias && Decl &&
- Decl->getArgPassingRestrictions() == RecordDecl::APK_CanPassInRegs)
+ Decl->getArgPassingRestrictions() ==
+ RecordArgPassingKind::CanPassInRegs)
// When calling the function, the pointer passed in will be the only
// reference to the underlying object. Mark it accordingly.
Attrs.addAttribute(llvm::Attribute::NoAlias);
@@ -3015,7 +3063,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// indicates dereferenceability, and if the size is constant we can
// use the dereferenceable attribute (which requires the size in
// bytes).
- if (ArrTy->getSizeModifier() == ArrayType::Static) {
+ if (ArrTy->getSizeModifier() == ArraySizeModifier::Static) {
QualType ETy = ArrTy->getElementType();
llvm::Align Alignment =
CGM.getNaturalTypeAlignment(ETy).getAsAlign();
@@ -3039,7 +3087,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// For C99 VLAs with the static keyword, we don't know the size so
// we can't use the dereferenceable attribute, but in addrspace(0)
// we know that it must be nonnull.
- if (ArrTy->getSizeModifier() == VariableArrayType::Static) {
+ if (ArrTy->getSizeModifier() == ArraySizeModifier::Static) {
QualType ETy = ArrTy->getElementType();
llvm::Align Alignment =
CGM.getNaturalTypeAlignment(ETy).getAsAlign();
@@ -3400,9 +3448,9 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
const VarDecl *self = method->getSelfDecl();
if (!self->getType().isConstQualified()) return nullptr;
- // Look for a retain call.
- llvm::CallInst *retainCall =
- dyn_cast<llvm::CallInst>(result->stripPointerCasts());
+ // Look for a retain call. Note: stripPointerCasts looks through returned arg
+ // functions, which would cause us to miss the retain.
+ llvm::CallInst *retainCall = dyn_cast<llvm::CallInst>(result);
if (!retainCall || retainCall->getCalledOperand() !=
CGF.CGM.getObjCEntrypoints().objc_retain)
return nullptr;
@@ -3459,7 +3507,9 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
return nullptr;
// These aren't actually possible for non-coerced returns, and we
// only care about non-coerced returns on this code path.
- assert(!SI->isAtomic() && !SI->isVolatile());
+ // All memory instructions inside __try block are volatile.
+ assert(!SI->isAtomic() &&
+ (!SI->isVolatile() || CGF.currentFunctionUsesSEHTry()));
return SI;
};
// If there are multiple uses of the return-value slot, just check
@@ -3989,10 +4039,6 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
QualType type = param->getType();
- if (isInAllocaArgument(CGM.getCXXABI(), type)) {
- CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter");
- }
-
// GetAddrOfLocalVar returns a pointer-to-pointer for references,
// but the argument needs to be the original pointer.
if (type->isReferenceType()) {
@@ -4262,15 +4308,13 @@ void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) {
assert(!StackBase);
// Save the stack.
- llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave);
- StackBase = CGF.Builder.CreateCall(F, {}, "inalloca.save");
+ StackBase = CGF.Builder.CreateStackSave("inalloca.save");
}
void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
if (StackBase) {
// Restore the stack after the call.
- llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
- CGF.Builder.CreateCall(F, StackBase);
+ CGF.Builder.CreateStackRestore(StackBase);
}
}
@@ -5105,7 +5149,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
"indirect argument must be in alloca address space");
bool NeedCopy = false;
-
if (Addr.getAlignment() < Align &&
llvm::getOrEnforceKnownAlignment(V, Align.getAsAlign(), *TD) <
Align.getAsAlign()) {
@@ -5114,12 +5157,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
auto LV = I->getKnownLValue();
auto AS = LV.getAddressSpace();
- if (!ArgInfo.getIndirectByVal() ||
+ bool isByValOrRef =
+ ArgInfo.isIndirectAliased() || ArgInfo.getIndirectByVal();
+
+ if (!isByValOrRef ||
(LV.getAlignment() < getContext().getTypeAlignInChars(I->Ty))) {
NeedCopy = true;
}
if (!getLangOpts().OpenCL) {
- if ((ArgInfo.getIndirectByVal() &&
+ if ((isByValOrRef &&
(AS != LangAS::Default &&
AS != CGM.getASTAllocaAddressSpace()))) {
NeedCopy = true;
@@ -5127,7 +5173,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
}
// For OpenCL even if RV is located in default or alloca address space
// we don't want to perform address space cast for it.
- else if ((ArgInfo.getIndirectByVal() &&
+ else if ((isByValOrRef &&
Addr.getType()->getAddressSpace() != IRFuncTy->
getParamType(FirstIRArg)->getPointerAddressSpace())) {
NeedCopy = true;
@@ -5244,30 +5290,50 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
llvm::Type *SrcTy = Src.getElementType();
- uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
- uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);
+ llvm::TypeSize SrcTypeSize =
+ CGM.getDataLayout().getTypeAllocSize(SrcTy);
+ llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy);
+ if (SrcTypeSize.isScalable()) {
+ assert(STy->containsHomogeneousScalableVectorTypes() &&
+ "ABI only supports structure with homogeneous scalable vector "
+ "type");
+ assert(SrcTypeSize == DstTypeSize &&
+ "Only allow non-fractional movement of structure with "
+ "homogeneous scalable vector type");
+ assert(NumIRArgs == STy->getNumElements());
- // If the source type is smaller than the destination type of the
- // coerce-to logic, copy the source value into a temp alloca the size
- // of the destination type to allow loading all of it. The bits past
- // the source value are left undef.
- if (SrcSize < DstSize) {
- Address TempAlloca
- = CreateTempAlloca(STy, Src.getAlignment(),
- Src.getName() + ".coerce");
- Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
- Src = TempAlloca;
+ llvm::Value *StoredStructValue =
+ Builder.CreateLoad(Src, Src.getName() + ".tuple");
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ llvm::Value *Extract = Builder.CreateExtractValue(
+ StoredStructValue, i, Src.getName() + ".extract" + Twine(i));
+ IRCallArgs[FirstIRArg + i] = Extract;
+ }
} else {
- Src = Src.withElementType(STy);
- }
+ uint64_t SrcSize = SrcTypeSize.getFixedValue();
+ uint64_t DstSize = DstTypeSize.getFixedValue();
- assert(NumIRArgs == STy->getNumElements());
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Address EltPtr = Builder.CreateStructGEP(Src, i);
- llvm::Value *LI = Builder.CreateLoad(EltPtr);
- if (ArgHasMaybeUndefAttr)
- LI = Builder.CreateFreeze(LI);
- IRCallArgs[FirstIRArg + i] = LI;
+ // If the source type is smaller than the destination type of the
+ // coerce-to logic, copy the source value into a temp alloca the size
+ // of the destination type to allow loading all of it. The bits past
+ // the source value are left undef.
+ if (SrcSize < DstSize) {
+ Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(),
+ Src.getName() + ".coerce");
+ Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
+ Src = TempAlloca;
+ } else {
+ Src = Src.withElementType(STy);
+ }
+
+ assert(NumIRArgs == STy->getNumElements());
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Address EltPtr = Builder.CreateStructGEP(Src, i);
+ llvm::Value *LI = Builder.CreateLoad(EltPtr);
+ if (ArgHasMaybeUndefAttr)
+ LI = Builder.CreateFreeze(LI);
+ IRCallArgs[FirstIRArg + i] = LI;
+ }
}
} else {
// In the simple case, just pass the coerced loaded value.
@@ -5442,11 +5508,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
/*AttrOnCallSite=*/true,
/*IsThunk=*/false);
- if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl))
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl)) {
if (FD->hasAttr<StrictFPAttr>())
// All calls within a strictfp function are marked strictfp
Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::StrictFP);
+ // If -ffast-math is enabled and the function is guarded by an
+ // '__attribute__((optnone)) adjust the memory attribute so the BE emits the
+ // library call instead of the intrinsic.
+ if (FD->hasAttr<OptimizeNoneAttr>() && getLangOpts().FastMath)
+ CGM.AdjustMemoryAttribute(CalleePtr->getName(), Callee.getAbstractInfo(),
+ Attrs);
+ }
// Add call-site nomerge attribute if exists.
if (InNoMergeAttributedStmt)
Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoMerge);
@@ -5535,6 +5608,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
BundleList);
EmitBlock(Cont);
}
+ if (CI->getCalledFunction() && CI->getCalledFunction()->hasName() &&
+ CI->getCalledFunction()->getName().startswith("_Z4sqrt")) {
+ SetSqrtFPAccuracy(CI);
+ }
if (callOrInvoke)
*callOrInvoke = CI;
@@ -5765,9 +5842,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
DestIsVolatile = false;
}
- // If the value is offset in memory, apply the offset now.
- Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
- CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+ // An empty record can overlap other data (if declared with
+ // no_unique_address); omit the store for such types - as there is no
+ // actual data to store.
+ if (!isEmptyRecord(getContext(), RetTy, true)) {
+ // If the value is offset in memory, apply the offset now.
+ Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
+ CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+ }
return convertTempToRValue(DestPtr, RetTy, SourceLocation());
}
diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
index eaaf10c4eec6..aee86a3242fd 100644
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -256,7 +256,7 @@ public:
/// arguments in a call.
class CallArgList : public SmallVector<CallArg, 8> {
public:
- CallArgList() : StackBase(nullptr) {}
+ CallArgList() = default;
struct Writeback {
/// The original argument. Note that the argument l-value
@@ -342,7 +342,7 @@ private:
SmallVector<CallArgCleanup, 1> CleanupsToDeactivate;
/// The stacksave call. It dominates all of the argument evaluation.
- llvm::CallInst *StackBase;
+ llvm::CallInst *StackBase = nullptr;
};
/// FunctionArgList - Type for representing both the decl and type
@@ -375,14 +375,58 @@ public:
bool isExternallyDestructed() const { return IsExternallyDestructed; }
};
-/// Helper to add attributes to \p F according to the CodeGenOptions and
-/// LangOptions without requiring a CodeGenModule to be constructed.
+/// Adds attributes to \p F according to our \p CodeGenOpts and \p LangOpts, as
+/// though we had emitted it ourselves. We remove any attributes on F that
+/// conflict with the attributes we add here.
+///
+/// This is useful for adding attrs to bitcode modules that you want to link
+/// with but don't control, such as CUDA's libdevice. When linking with such
+/// a bitcode library, you might want to set e.g. its functions'
+/// "unsafe-fp-math" attribute to match the attr of the functions you're
+/// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of
+/// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
+/// will propagate unsafe-fp-math=false up to every transitive caller of a
+/// function in the bitcode library!
+///
+/// With the exception of fast-math attrs, this will only make the attributes
+/// on the function more conservative. But it's unsafe to call this on a
+/// function which relies on particular fast-math attributes for correctness.
+/// It's up to you to ensure that this is safe.
void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F,
- const CodeGenOptions CodeGenOpts,
+ const CodeGenOptions &CodeGenOpts,
const LangOptions &LangOpts,
const TargetOptions &TargetOpts,
bool WillInternalize);
+enum class FnInfoOpts {
+ None = 0,
+ IsInstanceMethod = 1 << 0,
+ IsChainCall = 1 << 1,
+ IsDelegateCall = 1 << 2,
+};
+
+inline FnInfoOpts operator|(FnInfoOpts A, FnInfoOpts B) {
+ return static_cast<FnInfoOpts>(
+ static_cast<std::underlying_type_t<FnInfoOpts>>(A) |
+ static_cast<std::underlying_type_t<FnInfoOpts>>(B));
+}
+
+inline FnInfoOpts operator&(FnInfoOpts A, FnInfoOpts B) {
+ return static_cast<FnInfoOpts>(
+ static_cast<std::underlying_type_t<FnInfoOpts>>(A) &
+ static_cast<std::underlying_type_t<FnInfoOpts>>(B));
+}
+
+inline FnInfoOpts operator|=(FnInfoOpts A, FnInfoOpts B) {
+ A = A | B;
+ return A;
+}
+
+inline FnInfoOpts operator&=(FnInfoOpts A, FnInfoOpts B) {
+ A = A & B;
+ return A;
+}
+
} // end namespace CodeGen
} // end namespace clang
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 93e7b54fca04..d18f186ce5b4 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -28,6 +28,7 @@
#include "clang/CodeGen/CGFunctionInfo.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
#include <optional>
@@ -138,7 +139,7 @@ Address CodeGenFunction::LoadCXXThisAddress() {
CXXThisAlignment = CGM.getClassPointerAlignment(MD->getParent());
}
- llvm::Type *Ty = ConvertType(MD->getThisType()->getPointeeType());
+ llvm::Type *Ty = ConvertType(MD->getFunctionObjectParameterType());
return Address(LoadCXXThis(), Ty, CXXThisAlignment, KnownNonNull);
}
@@ -403,11 +404,8 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr,
assert(PathBegin != PathEnd && "Base path should not be empty!");
QualType DerivedTy =
- getContext().getCanonicalType(getContext().getTagDeclType(Derived));
- unsigned AddrSpace = BaseAddr.getAddressSpace();
+ getContext().getCanonicalType(getContext().getTagDeclType(Derived));
llvm::Type *DerivedValueTy = ConvertType(DerivedTy);
- llvm::Type *DerivedPtrTy =
- llvm::PointerType::get(getLLVMContext(), AddrSpace);
llvm::Value *NonVirtualOffset =
CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd);
@@ -432,13 +430,10 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr,
}
// Apply the offset.
- llvm::Value *Value = Builder.CreateBitCast(BaseAddr.getPointer(), Int8PtrTy);
+ llvm::Value *Value = BaseAddr.getPointer();
Value = Builder.CreateInBoundsGEP(
Int8Ty, Value, Builder.CreateNeg(NonVirtualOffset), "sub.ptr");
- // Just cast.
- Value = Builder.CreateBitCast(Value, DerivedPtrTy);
-
// Produce a PHI if we had a null-check.
if (NullCheckValue) {
Builder.CreateBr(CastEnd);
@@ -516,7 +511,7 @@ namespace {
const CXXDestructorDecl *D = BaseClass->getDestructor();
// We are already inside a destructor, so presumably the object being
// destroyed should have the expected type.
- QualType ThisTy = D->getThisObjectType();
+ QualType ThisTy = D->getFunctionObjectParameterType();
Address Addr =
CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThisAddress(),
DerivedClass, BaseClass,
@@ -1297,10 +1292,10 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
assert(BaseCtorContinueBB);
}
- llvm::Value *const OldThis = CXXThisValue;
for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) {
if (!ConstructVBases)
continue;
+ SaveAndRestore ThisRAII(CXXThisValue);
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isInitializerOfDynamicClass(*B))
@@ -1317,7 +1312,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
// Then, non-virtual base initializers.
for (; B != E && (*B)->isBaseInitializer(); B++) {
assert(!(*B)->isBaseVirtual());
-
+ SaveAndRestore ThisRAII(CXXThisValue);
if (CGM.getCodeGenOpts().StrictVTablePointers &&
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isInitializerOfDynamicClass(*B))
@@ -1325,8 +1320,6 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
EmitBaseInitializer(*this, ClassDecl, *B);
}
- CXXThisValue = OldThis;
-
InitializeVTablePointers(ClassDecl);
// And finally, initialize class members.
@@ -1462,7 +1455,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
RunCleanupsScope DtorEpilogue(*this);
EnterDtorCleanups(Dtor, Dtor_Deleting);
if (HaveInsertPoint()) {
- QualType ThisTy = Dtor->getThisObjectType();
+ QualType ThisTy = Dtor->getFunctionObjectParameterType();
EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false,
/*Delegating=*/false, LoadCXXThisAddress(), ThisTy);
}
@@ -1496,7 +1489,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
EnterDtorCleanups(Dtor, Dtor_Complete);
if (!isTryBody) {
- QualType ThisTy = Dtor->getThisObjectType();
+ QualType ThisTy = Dtor->getFunctionObjectParameterType();
EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false,
/*Delegating=*/false, LoadCXXThisAddress(), ThisTy);
break;
@@ -1676,8 +1669,7 @@ namespace {
CodeGenFunction::SanitizerScope SanScope(&CGF);
// Pass in void pointer and size of region as arguments to runtime
// function
- SmallVector<llvm::Value *, 2> Args = {
- CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy)};
+ SmallVector<llvm::Value *, 2> Args = {Ptr};
SmallVector<llvm::Type *, 2> ArgTypes = {CGF.VoidPtrTy};
if (PoisonSize.has_value()) {
@@ -1756,10 +1748,8 @@ namespace {
llvm::ConstantInt *OffsetSizePtr =
llvm::ConstantInt::get(CGF.SizeTy, PoisonStart.getQuantity());
- llvm::Value *OffsetPtr = CGF.Builder.CreateGEP(
- CGF.Int8Ty,
- CGF.Builder.CreateBitCast(CGF.LoadCXXThis(), CGF.Int8PtrTy),
- OffsetSizePtr);
+ llvm::Value *OffsetPtr =
+ CGF.Builder.CreateGEP(CGF.Int8Ty, CGF.LoadCXXThis(), OffsetSizePtr);
CharUnits PoisonEnd;
if (EndIndex >= Layout.getFieldCount()) {
@@ -2123,8 +2113,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
CallArgList Args;
Address This = ThisAVS.getAddress();
LangAS SlotAS = ThisAVS.getQualifiers().getAddressSpace();
- QualType ThisType = D->getThisType();
- LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace();
+ LangAS ThisAS = D->getFunctionObjectParameterType().getAddressSpace();
llvm::Value *ThisPtr = This.getPointer();
if (SlotAS != ThisAS) {
@@ -2463,7 +2452,7 @@ namespace {
void Emit(CodeGenFunction &CGF, Flags flags) override {
// We are calling the destructor from within the constructor.
// Therefore, "this" should have the expected type.
- QualType ThisTy = Dtor->getThisObjectType();
+ QualType ThisTy = Dtor->getFunctionObjectParameterType();
CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false,
/*Delegating=*/true, Addr, ThisTy);
}
@@ -2736,7 +2725,6 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
llvm::Value *TypeId =
llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD);
- llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
// If we already know that the call has hidden LTO visibility, emit
// @llvm.type.test(). Otherwise emit @llvm.public.type.test(), which WPD
// will convert to @llvm.type.test() if we assert at link time that we have
@@ -2745,7 +2733,7 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
? llvm::Intrinsic::type_test
: llvm::Intrinsic::public_type_test;
llvm::Value *TypeTest =
- Builder.CreateCall(CGM.getIntrinsic(IID), {CastedVTable, TypeId});
+ Builder.CreateCall(CGM.getIntrinsic(IID), {VTable, TypeId});
Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest);
}
}
@@ -2849,9 +2837,8 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD);
- llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
llvm::Value *TypeTest = Builder.CreateCall(
- CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId});
+ CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, TypeId});
llvm::Constant *StaticData[] = {
llvm::ConstantInt::get(Int8Ty, TCK),
@@ -2861,7 +2848,7 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
- EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, CastedVTable, StaticData);
+ EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, VTable, StaticData);
return;
}
@@ -2874,9 +2861,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
CGM.getLLVMContext(),
llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
llvm::Value *ValidVtable = Builder.CreateCall(
- CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, AllVtables});
+ CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables});
EmitCheck(std::make_pair(TypeTest, M), SanitizerHandler::CFICheckFail,
- StaticData, {CastedVTable, ValidVtable});
+ StaticData, {VTable, ValidVtable});
}
bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
@@ -2907,11 +2894,9 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD);
- llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
llvm::Value *CheckedLoad = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::type_checked_load),
- {CastedVTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset),
- TypeId});
+ {VTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset), TypeId});
llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1);
std::string TypeName = RD->getQualifiedNameAsString();
@@ -2927,14 +2912,16 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
}
void CodeGenFunction::EmitForwardingCallToLambda(
- const CXXMethodDecl *callOperator,
- CallArgList &callArgs) {
+ const CXXMethodDecl *callOperator, CallArgList &callArgs,
+ const CGFunctionInfo *calleeFnInfo, llvm::Constant *calleePtr) {
// Get the address of the call operator.
- const CGFunctionInfo &calleeFnInfo =
- CGM.getTypes().arrangeCXXMethodDeclaration(callOperator);
- llvm::Constant *calleePtr =
- CGM.GetAddrOfFunction(GlobalDecl(callOperator),
- CGM.getTypes().GetFunctionType(calleeFnInfo));
+ if (!calleeFnInfo)
+ calleeFnInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(callOperator);
+
+ if (!calleePtr)
+ calleePtr =
+ CGM.GetAddrOfFunction(GlobalDecl(callOperator),
+ CGM.getTypes().GetFunctionType(*calleeFnInfo));
// Prepare the return slot.
const FunctionProtoType *FPT =
@@ -2942,8 +2929,8 @@ void CodeGenFunction::EmitForwardingCallToLambda(
QualType resultType = FPT->getReturnType();
ReturnValueSlot returnSlot;
if (!resultType->isVoidType() &&
- calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect &&
- !hasScalarEvaluationKind(calleeFnInfo.getReturnType()))
+ calleeFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
+ !hasScalarEvaluationKind(calleeFnInfo->getReturnType()))
returnSlot =
ReturnValueSlot(ReturnValue, resultType.isVolatileQualified(),
/*IsUnused=*/false, /*IsExternallyDestructed=*/true);
@@ -2954,7 +2941,7 @@ void CodeGenFunction::EmitForwardingCallToLambda(
// Now emit our call.
auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator));
- RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs);
+ RValue RV = EmitCall(*calleeFnInfo, callee, returnSlot, callArgs);
// If necessary, copy the returned value into the slot.
if (!resultType->isVoidType() && returnSlot.isNull()) {
@@ -2996,7 +2983,15 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
EmitForwardingCallToLambda(CallOp, CallArgs);
}
-void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
+void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) {
+ if (MD->isVariadic()) {
+ // FIXME: Making this work correctly is nasty because it requires either
+ // cloning the body of the call operator or making the call operator
+ // forward.
+ CGM.ErrorUnsupported(MD, "lambda conversion to variadic function");
+ return;
+ }
+
const CXXRecordDecl *Lambda = MD->getParent();
// Start building arguments for forwarding call
@@ -3007,10 +3002,16 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
Address ThisPtr = CreateMemTemp(LambdaType, "unused.capture");
CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
- // Add the rest of the parameters.
+ EmitLambdaDelegatingInvokeBody(MD, CallArgs);
+}
+
+void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD,
+ CallArgList &CallArgs) {
+ // Add the rest of the forwarded parameters.
for (auto *Param : MD->parameters())
EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc());
+ const CXXRecordDecl *Lambda = MD->getParent();
const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
// For a generic lambda, find the corresponding call operator specialization
// to which the call to the static-invoker shall be forwarded.
@@ -3024,10 +3025,21 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
assert(CorrespondingCallOpSpecialization);
CallOp = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
}
+
+ // Special lambda forwarding when there are inalloca parameters.
+ if (hasInAllocaArg(MD)) {
+ const CGFunctionInfo *ImplFnInfo = nullptr;
+ llvm::Function *ImplFn = nullptr;
+ EmitLambdaInAllocaImplFn(CallOp, &ImplFnInfo, &ImplFn);
+
+ EmitForwardingCallToLambda(CallOp, CallArgs, ImplFnInfo, ImplFn);
+ return;
+ }
+
EmitForwardingCallToLambda(CallOp, CallArgs);
}
-void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) {
+void CodeGenFunction::EmitLambdaInAllocaCallOpBody(const CXXMethodDecl *MD) {
if (MD->isVariadic()) {
// FIXME: Making this work correctly is nasty because it requires either
// cloning the body of the call operator or making the call operator forward.
@@ -3035,5 +3047,56 @@ void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) {
return;
}
- EmitLambdaDelegatingInvokeBody(MD);
+ // Forward %this argument.
+ CallArgList CallArgs;
+ QualType LambdaType = getContext().getRecordType(MD->getParent());
+ QualType ThisType = getContext().getPointerType(LambdaType);
+ llvm::Value *ThisArg = CurFn->getArg(0);
+ CallArgs.add(RValue::get(ThisArg), ThisType);
+
+ EmitLambdaDelegatingInvokeBody(MD, CallArgs);
+}
+
+void CodeGenFunction::EmitLambdaInAllocaImplFn(
+ const CXXMethodDecl *CallOp, const CGFunctionInfo **ImplFnInfo,
+ llvm::Function **ImplFn) {
+ const CGFunctionInfo &FnInfo =
+ CGM.getTypes().arrangeCXXMethodDeclaration(CallOp);
+ llvm::Function *CallOpFn =
+ cast<llvm::Function>(CGM.GetAddrOfFunction(GlobalDecl(CallOp)));
+
+ // Emit function containing the original call op body. __invoke will delegate
+ // to this function.
+ SmallVector<CanQualType, 4> ArgTypes;
+ for (auto I = FnInfo.arg_begin(); I != FnInfo.arg_end(); ++I)
+ ArgTypes.push_back(I->type);
+ *ImplFnInfo = &CGM.getTypes().arrangeLLVMFunctionInfo(
+ FnInfo.getReturnType(), FnInfoOpts::IsDelegateCall, ArgTypes,
+ FnInfo.getExtInfo(), {}, FnInfo.getRequiredArgs());
+
+ // Create mangled name as if this was a method named __impl. If for some
+ // reason the name doesn't look as expected then just tack __impl to the
+ // front.
+ // TODO: Use the name mangler to produce the right name instead of using
+ // string replacement.
+ StringRef CallOpName = CallOpFn->getName();
+ std::string ImplName;
+ if (size_t Pos = CallOpName.find_first_of("<lambda"))
+ ImplName = ("?__impl@" + CallOpName.drop_front(Pos)).str();
+ else
+ ImplName = ("__impl" + CallOpName).str();
+
+ llvm::Function *Fn = CallOpFn->getParent()->getFunction(ImplName);
+ if (!Fn) {
+ Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(**ImplFnInfo),
+ llvm::GlobalValue::InternalLinkage, ImplName,
+ CGM.getModule());
+ CGM.SetInternalFunctionAttributes(CallOp, Fn, **ImplFnInfo);
+
+ const GlobalDecl &GD = GlobalDecl(CallOp);
+ const auto *D = cast<FunctionDecl>(GD.getDecl());
+ CodeGenFunction(CGM).GenerateCode(GD, Fn, **ImplFnInfo);
+ CGM.SetLLVMFunctionAttributesForDefinition(D, Fn);
+ }
+ *ImplFn = Fn;
}
diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp
index 0bbab283603d..f87caf050eea 100644
--- a/clang/lib/CodeGen/CGCleanup.cpp
+++ b/clang/lib/CodeGen/CGCleanup.cpp
@@ -207,8 +207,13 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
Scope->setLifetimeMarker();
// With Windows -EHa, Invoke llvm.seh.scope.begin() for EHCleanup
+ // If exceptions are disabled/ignored and SEH is not in use, then there is no
+ // invoke destination. SEH "works" even if exceptions are off. In practice,
+ // this means that C++ destructors and other EH cleanups don't run, which is
+ // consistent with MSVC's behavior, except in the presence of -EHa.
+ // Check getInvokeDest() to generate llvm.seh.scope.begin() as needed.
if (CGF->getLangOpts().EHAsynch && IsEHCleanup && !IsLifetimeMarker &&
- CGF->getTarget().getCXXABI().isMicrosoft())
+ CGF->getTarget().getCXXABI().isMicrosoft() && CGF->getInvokeDest())
CGF->EmitSehCppScopeBegin();
return Scope->getCleanupBuffer();
@@ -868,8 +873,13 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
// If there's exactly one branch-after and no other threads,
// we can route it without a switch.
+ // Skip for SEH, since ExitSwitch is used to generate code to indicate
+ // abnormal termination. (SEH: Except _leave and fall-through at
+ // the end, all other exits in a _try (return/goto/continue/break)
+ // are considered as abnormal terminations, using NormalCleanupDestSlot
+ // to indicate abnormal termination)
if (!Scope.hasBranchThroughs() && !HasFixups && !HasFallthrough &&
- Scope.getNumBranchAfters() == 1) {
+ !currentFunctionUsesSEHTry() && Scope.getNumBranchAfters() == 1) {
assert(!BranchThroughDest || !IsActive);
// Clean up the possibly dead store to the cleanup dest slot.
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 8437cda79beb..888d30bfb3e1 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -129,14 +129,48 @@ static SmallString<32> buildSuspendPrefixStr(CGCoroData &Coro, AwaitKind Kind) {
return Prefix;
}
-static bool memberCallExpressionCanThrow(const Expr *E) {
- if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
- if (const auto *Proto =
- CE->getMethodDecl()->getType()->getAs<FunctionProtoType>())
- if (isNoexceptExceptionSpec(Proto->getExceptionSpecType()) &&
- Proto->canThrow() == CT_Cannot)
- return false;
- return true;
+// Check if function can throw based on prototype noexcept, also works for
+// destructors which are implicitly noexcept but can be marked noexcept(false).
+static bool FunctionCanThrow(const FunctionDecl *D) {
+ const auto *Proto = D->getType()->getAs<FunctionProtoType>();
+ if (!Proto) {
+ // Function proto is not found, we conservatively assume throwing.
+ return true;
+ }
+ return !isNoexceptExceptionSpec(Proto->getExceptionSpecType()) ||
+ Proto->canThrow() != CT_Cannot;
+}
+
+static bool ResumeStmtCanThrow(const Stmt *S) {
+ if (const auto *CE = dyn_cast<CallExpr>(S)) {
+ const auto *Callee = CE->getDirectCallee();
+ if (!Callee)
+ // We don't have direct callee. Conservatively assume throwing.
+ return true;
+
+ if (FunctionCanThrow(Callee))
+ return true;
+
+ // Fall through to visit the children.
+ }
+
+ if (const auto *TE = dyn_cast<CXXBindTemporaryExpr>(S)) {
+ // Special handling of CXXBindTemporaryExpr here as calling of Dtor of the
+ // temporary is not part of `children()` as covered in the fall through.
+ // We need to mark entire statement as throwing if the destructor of the
+ // temporary throws.
+ const auto *Dtor = TE->getTemporary()->getDestructor();
+ if (FunctionCanThrow(Dtor))
+ return true;
+
+ // Fall through to visit the children.
+ }
+
+ for (const auto *child : S->children())
+ if (ResumeStmtCanThrow(child))
+ return true;
+
+ return false;
}
// Emit suspend expression which roughly looks like:
@@ -201,6 +235,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
CGF.CurCoro.InSuspendBlock = true;
auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr());
CGF.CurCoro.InSuspendBlock = false;
+
if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) {
// Veto suspension if requested by bool returning await_suspend.
BasicBlock *RealSuspendBlock =
@@ -232,7 +267,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
// is marked as 'noexcept', we avoid generating this additional IR.
CXXTryStmt *TryStmt = nullptr;
if (Coro.ExceptionHandler && Kind == AwaitKind::Init &&
- memberCallExpressionCanThrow(S.getResumeExpr())) {
+ ResumeStmtCanThrow(S.getResumeExpr())) {
Coro.ResumeEHVar =
CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh"));
Builder.CreateFlagStore(true, Coro.ResumeEHVar);
@@ -244,6 +279,15 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
FPOptionsOverride(), Loc, Loc);
TryStmt = CXXTryStmt::Create(CGF.getContext(), Loc, TryBody, Catch);
CGF.EnterCXXTryStmt(*TryStmt);
+ CGF.EmitStmt(TryBody);
+ // We don't use EmitCXXTryStmt here. We need to store to ResumeEHVar that
+ // doesn't exist in the body.
+ Builder.CreateFlagStore(false, Coro.ResumeEHVar);
+ CGF.ExitCXXTryStmt(*TryStmt);
+ LValueOrRValue Res;
+ // We are not supposed to obtain the value from init suspend await_resume().
+ Res.RV = RValue::getIgnored();
+ return Res;
}
LValueOrRValue Res;
@@ -252,11 +296,6 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
else
Res.RV = CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult);
- if (TryStmt) {
- Builder.CreateFlagStore(false, Coro.ResumeEHVar);
- CGF.ExitCXXTryStmt(*TryStmt);
- }
-
return Res;
}
@@ -402,8 +441,11 @@ struct CallCoroEnd final : public EHScopeStack::Cleanup {
llvm::Function *CoroEndFn = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
// See if we have a funclet bundle to associate coro.end with. (WinEH)
auto Bundles = getBundlesForCoroEnd(CGF);
- auto *CoroEnd = CGF.Builder.CreateCall(
- CoroEndFn, {NullPtr, CGF.Builder.getTrue()}, Bundles);
+ auto *CoroEnd =
+ CGF.Builder.CreateCall(CoroEndFn,
+ {NullPtr, CGF.Builder.getTrue(),
+ llvm::ConstantTokenNone::get(CoroEndFn->getContext())},
+ Bundles);
if (Bundles.empty()) {
// Otherwise, (landingpad model), create a conditional branch that leads
// either to a cleanup block or a block with EH resume instruction.
@@ -531,6 +573,11 @@ struct GetReturnObjectManager {
Builder.CreateStore(Builder.getFalse(), GroActiveFlag);
GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl);
+ auto *GroAlloca = dyn_cast_or_null<llvm::AllocaInst>(
+ GroEmission.getOriginalAllocatedAddress().getPointer());
+ assert(GroAlloca && "expected alloca to be emitted");
+ GroAlloca->setMetadata(llvm::LLVMContext::MD_coro_outside_frame,
+ llvm::MDNode::get(CGF.CGM.getLLVMContext(), {}));
// Remember the top of EHStack before emitting the cleanup.
auto old_top = CGF.EHStack.stable_begin();
@@ -594,7 +641,7 @@ static void emitBodyAndFallthrough(CodeGenFunction &CGF,
}
void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
- auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
+ auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getPtrTy());
auto &TI = CGM.getContext().getTargetInfo();
unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth();
@@ -754,7 +801,9 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
// Emit coro.end before getReturnStmt (and parameter destructors), since
// resume and destroy parts of the coroutine should not include them.
llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end);
- Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()});
+ Builder.CreateCall(CoroEnd,
+ {NullPtr, Builder.getFalse(),
+ llvm::ConstantTokenNone::get(CoroEnd->getContext())});
if (Stmt *Ret = S.getReturnStmt()) {
// Since we already emitted the return value above, so we shouldn't
@@ -766,6 +815,10 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
// LLVM require the frontend to mark the coroutine.
CurFn->setPresplitCoroutine();
+
+ if (CXXRecordDecl *RD = FnRetTy->getAsCXXRecordDecl();
+ RD && RD->hasAttr<CoroOnlyDestroyWhenCompleteAttr>())
+ CurFn->setCoroDestroyOnlyWhenComplete();
}
// Emit coroutine intrinsic and patch up arguments of the token type.
@@ -783,7 +836,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
}
CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin "
"has been used earlier in this function");
- auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
+ auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getPtrTy());
return RValue::get(NullPtr);
}
case llvm::Intrinsic::coro_size: {
@@ -823,6 +876,10 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
}
for (const Expr *Arg : E->arguments())
Args.push_back(EmitScalarExpr(Arg));
+ // @llvm.coro.end takes a token parameter. Add token 'none' as the last
+ // argument.
+ if (IID == llvm::Intrinsic::coro_end)
+ Args.push_back(llvm::ConstantTokenNone::get(getLLVMContext()));
llvm::Function *F = CGM.getIntrinsic(IID);
llvm::CallInst *Call = Builder.CreateCall(F, Args);
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index f049a682cfed..7cf661994a29 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -391,12 +391,14 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
SourceManager &SM = CGM.getContext().getSourceManager();
StringRef FileName;
FileID FID;
+ std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
if (Loc.isInvalid()) {
// The DIFile used by the CU is distinct from the main source file. Call
// createFile() below for canonicalization if the source file was specified
// with an absolute path.
FileName = TheCU->getFile()->getFilename();
+ CSInfo = TheCU->getFile()->getChecksum();
} else {
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
FileName = PLoc.getFilename();
@@ -417,13 +419,14 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
return cast<llvm::DIFile>(V);
}
+ // Put Checksum at a scope where it will persist past the createFile call.
SmallString<64> Checksum;
-
- std::optional<llvm::DIFile::ChecksumKind> CSKind =
+ if (!CSInfo) {
+ std::optional<llvm::DIFile::ChecksumKind> CSKind =
computeChecksum(FID, Checksum);
- std::optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
- if (CSKind)
- CSInfo.emplace(*CSKind, Checksum);
+ if (CSKind)
+ CSInfo.emplace(*CSKind, Checksum);
+ }
return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
}
@@ -1442,6 +1445,8 @@ static unsigned getDwarfCC(CallingConv CC) {
return llvm::dwarf::DW_CC_LLVM_PreserveAll;
case CC_X86RegCall:
return llvm::dwarf::DW_CC_LLVM_X86RegCall;
+ case CC_M68kRTD:
+ return llvm::dwarf::DW_CC_LLVM_M68kRTD;
}
return 0;
}
@@ -1494,6 +1499,8 @@ CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl,
llvm::DIScope *RecordTy, const RecordDecl *RD) {
StringRef Name = BitFieldDecl->getName();
QualType Ty = BitFieldDecl->getType();
+ if (BitFieldDecl->hasAttr<PreferredTypeAttr>())
+ Ty = BitFieldDecl->getAttr<PreferredTypeAttr>()->getType();
SourceLocation Loc = BitFieldDecl->getLocation();
llvm::DIFile *VUnit = getOrCreateFile(Loc);
llvm::DIType *DebugType = getOrCreateType(Ty, VUnit);
@@ -1650,8 +1657,10 @@ void CGDebugInfo::CollectRecordLambdaFields(
FieldDecl *f = *Field;
llvm::DIFile *VUnit = getOrCreateFile(f->getLocation());
QualType type = f->getType();
+ StringRef ThisName =
+ CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this";
llvm::DIType *fieldType = createFieldType(
- "this", type, f->getLocation(), f->getAccess(),
+ ThisName, type, f->getLocation(), f->getAccess(),
layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
elements.push_back(fieldType);
@@ -1670,6 +1679,9 @@ CGDebugInfo::CreateRecordStaticField(const VarDecl *Var, llvm::DIType *RecordTy,
unsigned LineNumber = getLineNumber(Var->getLocation());
StringRef VName = Var->getName();
+
+ // FIXME: to avoid complications with type merging we should
+ // emit the constant on the definition instead of the declaration.
llvm::Constant *C = nullptr;
if (Var->getInit()) {
const APValue *Value = Var->evaluateValue();
@@ -1682,9 +1694,12 @@ CGDebugInfo::CreateRecordStaticField(const VarDecl *Var, llvm::DIType *RecordTy,
}
llvm::DINode::DIFlags Flags = getAccessFlag(Var->getAccess(), RD);
+ auto Tag = CGM.getCodeGenOpts().DwarfVersion >= 5
+ ? llvm::dwarf::DW_TAG_variable
+ : llvm::dwarf::DW_TAG_member;
auto Align = getDeclAlignIfRequired(Var, CGM.getContext());
llvm::DIDerivedType *GV = DBuilder.createStaticMemberType(
- RecordTy, VName, VUnit, LineNumber, VTy, Flags, C, Align);
+ RecordTy, VName, VUnit, LineNumber, VTy, Flags, C, Tag, Align);
StaticDataMemberCache[Var->getCanonicalDecl()].reset(GV);
return GV;
}
@@ -2127,14 +2142,14 @@ CGDebugInfo::CollectTemplateParams(std::optional<TemplateArgs> OArgs,
// attribute, i.e. that value is not available at the host side.
if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice ||
!D->hasAttr<CUDADeviceAttr>()) {
- const CXXMethodDecl *MD;
// Variable pointer template parameters have a value that is the address
// of the variable.
if (const auto *VD = dyn_cast<VarDecl>(D))
V = CGM.GetAddrOfGlobalVar(VD);
// Member function pointers have special support for building them,
// though this is currently unsupported in LLVM CodeGen.
- else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance())
+ else if (const auto *MD = dyn_cast<CXXMethodDecl>(D);
+ MD && MD->isImplicitObjectMemberFunction())
V = CGM.getCXXABI().EmitMemberFunctionPointer(MD);
else if (const auto *FD = dyn_cast<FunctionDecl>(D))
V = CGM.GetAddrOfFunction(FD);
@@ -3114,8 +3129,8 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty,
uint64_t NumVectorBytes = Size / Ctx.getCharWidth();
// Construct the vector of 'char' type.
- QualType CharVecTy = Ctx.getVectorType(Ctx.CharTy, NumVectorBytes,
- VectorType::GenericVector);
+ QualType CharVecTy =
+ Ctx.getVectorType(Ctx.CharTy, NumVectorBytes, VectorKind::Generic);
return CreateType(CharVecTy->getAs<VectorType>(), Unit);
}
@@ -3378,9 +3393,9 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
unsigned Line = getLineNumber(ED->getLocation());
llvm::DIScope *EnumContext = getDeclContextDescriptor(ED);
llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit);
- return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
- Line, Size, Align, EltArray, ClassTy,
- Identifier, ED->isScoped());
+ return DBuilder.createEnumerationType(
+ EnumContext, ED->getName(), DefUnit, Line, Size, Align, EltArray, ClassTy,
+ /*RunTimeLang=*/0, Identifier, ED->isScoped());
}
llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
@@ -3869,7 +3884,7 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
QualType ET = CGM.getContext().getAsArrayType(T)->getElementType();
T = CGM.getContext().getConstantArrayType(ET, ConstVal, nullptr,
- ArrayType::Normal, 0);
+ ArraySizeModifier::Normal, 0);
}
Name = VD->getName();
@@ -4541,7 +4556,7 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
if (NumPaddingBytes.isPositive()) {
llvm::APInt pad(32, NumPaddingBytes.getQuantity());
FType = CGM.getContext().getConstantArrayType(
- CGM.getContext().CharTy, pad, nullptr, ArrayType::Normal, 0);
+ CGM.getContext().CharTy, pad, nullptr, ArraySizeModifier::Normal, 0);
EltTys.push_back(CreateMemberType(Unit, FType, "", &FieldOffset));
}
}
@@ -4612,8 +4627,8 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
// If this is implicit parameter of CXXThis or ObjCSelf kind, then give it an
// object pointer flag.
if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD)) {
- if (IPD->getParameterKind() == ImplicitParamDecl::CXXThis ||
- IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
+ if (IPD->getParameterKind() == ImplicitParamKind::CXXThis ||
+ IPD->getParameterKind() == ImplicitParamKind::ObjCSelf)
Flags |= llvm::DINode::FlagObjectPointer;
}
@@ -4744,6 +4759,40 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
return D;
}
+llvm::DIType *CGDebugInfo::CreateBindingDeclType(const BindingDecl *BD) {
+ llvm::DIFile *Unit = getOrCreateFile(BD->getLocation());
+
+ // If the declaration is bound to a bitfield struct field, its type may have a
+ // size that is different from its deduced declaration type's.
+ if (const MemberExpr *ME = dyn_cast<MemberExpr>(BD->getBinding())) {
+ if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+ if (FD->isBitField()) {
+ ASTContext &Context = CGM.getContext();
+ const CGRecordLayout &RL =
+ CGM.getTypes().getCGRecordLayout(FD->getParent());
+ const CGBitFieldInfo &Info = RL.getBitFieldInfo(FD);
+
+ // Find an integer type with the same bitwidth as the bitfield size. If
+ // no suitable type is present in the target, give up on producing debug
+ // information as it would be wrong. It is certainly possible to produce
+ // correct debug info, but the logic isn't currently implemented.
+ uint64_t BitfieldSizeInBits = Info.Size;
+ QualType IntTy =
+ Context.getIntTypeForBitwidth(BitfieldSizeInBits, Info.IsSigned);
+ if (IntTy.isNull())
+ return nullptr;
+ Qualifiers Quals = BD->getType().getQualifiers();
+ QualType FinalTy = Context.getQualifiedType(IntTy, Quals);
+ llvm::DIType *Ty = getOrCreateType(FinalTy, Unit);
+ assert(Ty);
+ return Ty;
+ }
+ }
+ }
+
+ return getOrCreateType(BD->getType(), Unit);
+}
+
llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
llvm::Value *Storage,
std::optional<unsigned> ArgNo,
@@ -4758,8 +4807,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
if (isa<DeclRefExpr>(BD->getBinding()))
return nullptr;
- llvm::DIFile *Unit = getOrCreateFile(BD->getLocation());
- llvm::DIType *Ty = getOrCreateType(BD->getType(), Unit);
+ llvm::DIType *Ty = CreateBindingDeclType(BD);
// If there is no debug info for this type then do not emit debug info
// for this variable.
@@ -4785,6 +4833,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
unsigned Column = getColumnNumber(BD->getLocation());
StringRef Name = BD->getName();
auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
+ llvm::DIFile *Unit = getOrCreateFile(BD->getLocation());
// Create the descriptor for the variable.
llvm::DILocalVariable *D = DBuilder.createAutoVariable(
Scope, Name, Unit, Line, Ty, CGM.getLangOpts().Optimize,
@@ -4800,6 +4849,11 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const BindingDecl *BD,
const uint64_t fieldOffset = layout.getFieldOffset(fieldIndex);
if (fieldOffset != 0) {
+ // Currently if the field offset is not a multiple of byte, the produced
+ // location would not be accurate. Therefore give up.
+ if (fieldOffset % CGM.getContext().getCharWidth() != 0)
+ return nullptr;
+
Expr.push_back(llvm::dwarf::DW_OP_plus_uconst);
Expr.push_back(
CGM.getContext().toCharUnitsFromBits(fieldOffset).getQuantity());
@@ -4835,11 +4889,15 @@ CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage,
const bool UsePointerValue) {
assert(CGM.getCodeGenOpts().hasReducedDebugInfo());
- if (auto *DD = dyn_cast<DecompositionDecl>(VD))
+ if (auto *DD = dyn_cast<DecompositionDecl>(VD)) {
for (auto *B : DD->bindings()) {
EmitDeclare(B, Storage, std::nullopt, Builder,
VD->getType()->isReferenceType());
}
+ // Don't emit an llvm.dbg.declare for the composite storage as it doesn't
+ // correspond to a user variable.
+ return nullptr;
+ }
return EmitDeclare(VD, Storage, std::nullopt, Builder, UsePointerValue);
}
@@ -4903,7 +4961,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
// Self is passed along as an implicit non-arg variable in a
// block. Mark it as the object pointer.
if (const auto *IPD = dyn_cast<ImplicitParamDecl>(VD))
- if (IPD->getParameterKind() == ImplicitParamDecl::ObjCSelf)
+ if (IPD->getParameterKind() == ImplicitParamKind::ObjCSelf)
Ty = CreateSelfType(VD->getType(), Ty);
// Get location information.
@@ -5530,25 +5588,8 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
auto &GV = DeclCache[VD];
if (GV)
return;
- llvm::DIExpression *InitExpr = nullptr;
- if (CGM.getContext().getTypeSize(VD->getType()) <= 64) {
- // FIXME: Add a representation for integer constants wider than 64 bits.
- if (Init.isInt()) {
- const llvm::APSInt &InitInt = Init.getInt();
- std::optional<uint64_t> InitIntOpt;
- if (InitInt.isUnsigned())
- InitIntOpt = InitInt.tryZExtValue();
- else if (auto tmp = InitInt.trySExtValue(); tmp.has_value())
- // Transform a signed optional to unsigned optional. When cpp 23 comes,
- // use std::optional::transform
- InitIntOpt = (uint64_t)tmp.value();
- if (InitIntOpt)
- InitExpr = DBuilder.createConstantValueExpression(InitIntOpt.value());
- } else if (Init.isFloat())
- InitExpr = DBuilder.createConstantValueExpression(
- Init.getFloat().bitcastToAPInt().getZExtValue());
- }
+ llvm::DIExpression *InitExpr = createConstantValueExpression(VD, Init);
llvm::MDTuple *TemplateParameters = nullptr;
if (isa<VarTemplateSpecializationDecl>(VD))
@@ -5885,3 +5926,32 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const {
return llvm::DINode::FlagAllCallsDescribed;
}
+
+llvm::DIExpression *
+CGDebugInfo::createConstantValueExpression(const clang::ValueDecl *VD,
+ const APValue &Val) {
+ // FIXME: Add a representation for integer constants wider than 64 bits.
+ if (CGM.getContext().getTypeSize(VD->getType()) > 64)
+ return nullptr;
+
+ if (Val.isFloat())
+ return DBuilder.createConstantValueExpression(
+ Val.getFloat().bitcastToAPInt().getZExtValue());
+
+ if (!Val.isInt())
+ return nullptr;
+
+ llvm::APSInt const &ValInt = Val.getInt();
+ std::optional<uint64_t> ValIntOpt;
+ if (ValInt.isUnsigned())
+ ValIntOpt = ValInt.tryZExtValue();
+ else if (auto tmp = ValInt.trySExtValue())
+ // Transform a signed optional to unsigned optional. When cpp 23 comes,
+ // use std::optional::transform
+ ValIntOpt = static_cast<uint64_t>(*tmp);
+
+ if (ValIntOpt)
+ return DBuilder.createConstantValueExpression(ValIntOpt.value());
+
+ return nullptr;
+}
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 1fd08626358b..7b60e94555d0 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -148,7 +148,7 @@ class CGDebugInfo {
llvm::BumpPtrAllocator DebugInfoNames;
StringRef CWDName;
- llvm::StringMap<llvm::TrackingMDRef> DIFileCache;
+ llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache;
llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache;
/// Cache declarations relevant to DW_TAG_imported_declarations (C++
/// using declarations and global alias variables) that aren't covered
@@ -337,6 +337,9 @@ class CGDebugInfo {
llvm::DIScope *RecordTy,
const RecordDecl *RD);
+ /// Create type for binding declarations.
+ llvm::DIType *CreateBindingDeclType(const BindingDecl *BD);
+
/// Create an anonnymous zero-size separator for bit-field-decl if needed on
/// the target.
llvm::DIDerivedType *createBitFieldSeparatorIfNeeded(
@@ -797,6 +800,11 @@ private:
llvm::MDTuple *&TemplateParameters,
llvm::DIScope *&VDContext);
+ /// Create a DIExpression representing the constant corresponding
+ /// to the specified 'Val'. Returns nullptr on failure.
+ llvm::DIExpression *createConstantValueExpression(const clang::ValueDecl *VD,
+ const APValue &Val);
+
/// Allocate a copy of \p A using the DebugInfoNames allocator
/// and return a reference to it. If multiple arguments are given the strings
/// are concatenated.
@@ -832,8 +840,10 @@ public:
// Define copy assignment operator.
ApplyDebugLocation &operator=(ApplyDebugLocation &&Other) {
- CGF = Other.CGF;
- Other.CGF = nullptr;
+ if (this != &Other) {
+ CGF = Other.CGF;
+ Other.CGF = nullptr;
+ }
return *this;
}
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index b0d6eb05acc2..a5da0aa2965a 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -96,7 +96,6 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::FriendTemplate:
case Decl::Block:
case Decl::Captured:
- case Decl::ClassScopeFunctionSpecialization:
case Decl::UsingShadow:
case Decl::ConstructorUsingShadow:
case Decl::ObjCTypeParam:
@@ -202,7 +201,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
return;
llvm::GlobalValue::LinkageTypes Linkage =
- CGM.getLLVMLinkageVarDefinition(&D, /*IsConstant=*/false);
+ CGM.getLLVMLinkageVarDefinition(&D);
// FIXME: We need to force the emission/use of a guard variable for
// some variables even if we can constant-evaluate them because
@@ -387,9 +386,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
GV->takeName(OldGV);
// Replace all uses of the old global with the new global
- llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
- OldGV->replaceAllUsesWith(NewPtrForOldDecl);
+ OldGV->replaceAllUsesWith(GV);
// Erase the old global, since it is no longer used.
OldGV->eraseFromParent();
@@ -398,7 +395,8 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
bool NeedsDtor =
D.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
- GV->setConstant(CGM.isTypeConstant(D.getType(), true, !NeedsDtor));
+ GV->setConstant(
+ D.getType().isConstantStorage(getContext(), true, !NeedsDtor));
GV->setInitializer(Init);
emitter.finalize(GV);
@@ -579,8 +577,7 @@ namespace {
bool isRedundantBeforeReturn() override { return true; }
void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::Value *V = CGF.Builder.CreateLoad(Stack);
- llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
- CGF.Builder.CreateCall(F, V);
+ CGF.Builder.CreateStackRestore(V);
}
};
@@ -1247,29 +1244,24 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
// If the initializer is small, use a handful of stores.
if (shouldSplitConstantStore(CGM, ConstantSize)) {
if (auto *STy = dyn_cast<llvm::StructType>(Ty)) {
- // FIXME: handle the case when STy != Loc.getElementType().
- if (STy == Loc.getElementType()) {
- for (unsigned i = 0; i != constant->getNumOperands(); i++) {
- Address EltPtr = Builder.CreateStructGEP(Loc, i);
- emitStoresForConstant(
- CGM, D, EltPtr, isVolatile, Builder,
- cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)),
- IsAutoInit);
- }
- return;
+ const llvm::StructLayout *Layout =
+ CGM.getDataLayout().getStructLayout(STy);
+ for (unsigned i = 0; i != constant->getNumOperands(); i++) {
+ CharUnits CurOff = CharUnits::fromQuantity(Layout->getElementOffset(i));
+ Address EltPtr = Builder.CreateConstInBoundsByteGEP(
+ Loc.withElementType(CGM.Int8Ty), CurOff);
+ emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
+ constant->getAggregateElement(i), IsAutoInit);
}
+ return;
} else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) {
- // FIXME: handle the case when ATy != Loc.getElementType().
- if (ATy == Loc.getElementType()) {
- for (unsigned i = 0; i != ATy->getNumElements(); i++) {
- Address EltPtr = Builder.CreateConstArrayGEP(Loc, i);
- emitStoresForConstant(
- CGM, D, EltPtr, isVolatile, Builder,
- cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)),
- IsAutoInit);
- }
- return;
+ for (unsigned i = 0; i != ATy->getNumElements(); i++) {
+ Address EltPtr = Builder.CreateConstGEP(
+ Loc.withElementType(ATy->getElementType()), i);
+ emitStoresForConstant(CGM, D, EltPtr, isVolatile, Builder,
+ constant->getAggregateElement(i), IsAutoInit);
}
+ return;
}
}
@@ -1359,7 +1351,6 @@ llvm::Value *CodeGenFunction::EmitLifetimeStart(llvm::TypeSize Size,
"Pointer should be in alloca address space");
llvm::Value *SizeV = llvm::ConstantInt::get(
Int64Ty, Size.isScalable() ? -1 : Size.getFixedValue());
- Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr});
C->setDoesNotThrow();
@@ -1370,7 +1361,6 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
assert(Addr->getType()->getPointerAddressSpace() ==
CGM.getDataLayout().getAllocaAddrSpace() &&
"Pointer should be in alloca address space");
- Addr = Builder.CreateBitCast(Addr, AllocaInt8PtrTy);
llvm::CallInst *C =
Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr});
C->setDoesNotThrow();
@@ -1499,7 +1489,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
if ((!getLangOpts().OpenCL ||
Ty.getAddressSpace() == LangAS::opencl_constant) &&
(CGM.getCodeGenOpts().MergeAllConstants && !NRVO &&
- !isEscapingByRef && CGM.isTypeConstant(Ty, true, !NeedsDtor))) {
+ !isEscapingByRef &&
+ Ty.isConstantStorage(getContext(), true, !NeedsDtor))) {
EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
// Signal this condition to later callbacks.
@@ -1533,8 +1524,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// applied.
llvm::Value *Zero = Builder.getFalse();
Address NRVOFlag =
- CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo",
- /*ArraySize=*/nullptr, &AllocaAddr);
+ CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo");
EnsureInsertPoint();
Builder.CreateStore(Zero, NRVOFlag);
@@ -1629,10 +1619,10 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
if (!DidCallStackSave) {
// Save the stack.
Address Stack =
- CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack");
+ CreateDefaultAlignTempAlloca(AllocaInt8PtrTy, "saved_stack");
- llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave);
- llvm::Value *V = Builder.CreateCall(F);
+ llvm::Value *V = Builder.CreateStackSave();
+ assert(V->getType() == AllocaInt8PtrTy);
Builder.CreateStore(V, Stack);
DidCallStackSave = true;
@@ -2523,7 +2513,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// Suppressing debug info for ThreadPrivateVar parameters, else it hides
// debug info of TLS variables.
NoDebugInfo =
- (IPD->getParameterKind() == ImplicitParamDecl::ThreadPrivateVar);
+ (IPD->getParameterKind() == ImplicitParamKind::ThreadPrivateVar);
}
Address DeclPtr = Address::invalid();
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index be8fb6c274db..e08a1e5f42df 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -126,7 +126,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(DestAS));
auto SrcAS = D.getType().getQualifiers().getAddressSpace();
if (DestAS == SrcAS)
- Argument = llvm::ConstantExpr::getBitCast(Addr.getPointer(), DestTy);
+ Argument = Addr.getPointer();
else
// FIXME: On addr space mismatch we are passing NULL. The generation
// of the global destructor function should be adjusted accordingly.
@@ -167,8 +167,7 @@ void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) {
// Emit a call with the size in bytes of the object.
uint64_t Width = Size.getQuantity();
- llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width),
- llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)};
+ llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width), Addr};
Builder.CreateCall(InvariantStart, Args);
}
@@ -217,7 +216,7 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
D.needsDestruction(getContext()) == QualType::DK_cxx_destructor;
if (PerformInit)
EmitDeclInit(*this, D, DeclAddr);
- if (CGM.isTypeConstant(D.getType(), true, !NeedsDtor))
+ if (D.getType().isConstantStorage(getContext(), true, !NeedsDtor))
EmitDeclInvariant(*this, D, DeclPtr);
else
EmitDeclDestroy(*this, D, DeclAddr);
@@ -279,8 +278,8 @@ llvm::Function *CodeGenFunction::createTLSAtExitStub(
}
const CGFunctionInfo &FI = CGM.getTypes().arrangeLLVMFunctionInfo(
- getContext().IntTy, /*instanceMethod=*/false, /*chainCall=*/false,
- {getContext().IntTy}, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+ getContext().IntTy, FnInfoOpts::None, {getContext().IntTy},
+ FunctionType::ExtInfo(), {}, RequiredArgs::All);
// Get the stub function type, int(*)(int,...).
llvm::FunctionType *StubTy =
@@ -293,7 +292,7 @@ llvm::Function *CodeGenFunction::createTLSAtExitStub(
FunctionArgList Args;
ImplicitParamDecl IPD(CGM.getContext(), CGM.getContext().IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&IPD);
QualType ResTy = CGM.getContext().IntTy;
@@ -328,6 +327,15 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD,
registerGlobalDtorWithAtExit(dtorStub);
}
+/// Register a global destructor using the LLVM 'llvm.global_dtors' global.
+void CodeGenFunction::registerGlobalDtorWithLLVM(const VarDecl &VD,
+ llvm::FunctionCallee Dtor,
+ llvm::Constant *Addr) {
+ // Create a function which calls the destructor.
+ llvm::Function *dtorStub = createAtExitStub(VD, Dtor, Addr);
+ CGM.AddGlobalDtor(dtorStub);
+}
+
void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) {
// extern "C" int atexit(void (*f)(void));
assert(dtorStub->getType() ==
@@ -520,10 +528,6 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
D->hasAttr<CUDASharedAttr>()))
return;
- if (getLangOpts().OpenMP &&
- getOpenMPRuntime().emitDeclareTargetVarDefinition(D, Addr, PerformInit))
- return;
-
// Check if we've already initialized this decl.
auto I = DelayedCXXInitPosition.find(D);
if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
@@ -655,6 +659,10 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() {
*/
void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
+ assert(Primary->isInterfaceOrPartition() &&
+ "The function should only be called for C++20 named module interface"
+ " or partition.");
+
while (!CXXGlobalInits.empty() && !CXXGlobalInits.back())
CXXGlobalInits.pop_back();
@@ -662,19 +670,35 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
// Module initializers for imported modules are emitted first.
// Collect all the modules that we import
- SmallVector<Module *> AllImports;
+ llvm::SmallSetVector<Module *, 8> AllImports;
// Ones that we export
for (auto I : Primary->Exports)
- AllImports.push_back(I.getPointer());
+ AllImports.insert(I.getPointer());
// Ones that we only import.
for (Module *M : Primary->Imports)
- AllImports.push_back(M);
+ AllImports.insert(M);
+ // Ones that we import in the global module fragment or the private module
+ // fragment.
+ for (Module *SubM : Primary->submodules()) {
+ assert((SubM->isGlobalModule() || SubM->isPrivateModule()) &&
+ "The sub modules of C++20 module unit should only be global module "
+ "fragments or private module framents.");
+ assert(SubM->Exports.empty() &&
+ "The global mdoule fragments and the private module fragments are "
+ "not allowed to export import modules.");
+ for (Module *M : SubM->Imports)
+ AllImports.insert(M);
+ }
SmallVector<llvm::Function *, 8> ModuleInits;
for (Module *M : AllImports) {
// No Itanium initializer in header like modules.
if (M->isHeaderLikeModule())
continue; // TODO: warn of mixed use of module map modules and C++20?
+ // We're allowed to skip the initialization if we are sure it doesn't
+ // do any thing.
+ if (!M->isNamedModuleInterfaceHasInit())
+ continue;
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
SmallString<256> FnName;
{
@@ -731,8 +755,7 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
// If we have a completely empty initializer then we do not want to create
// the guard variable.
ConstantAddress GuardAddr = ConstantAddress::invalid();
- if (!AllImports.empty() || !PrioritizedCXXGlobalInits.empty() ||
- !CXXGlobalInits.empty()) {
+ if (!ModuleInits.empty()) {
// Create the guard var.
llvm::GlobalVariable *Guard = new llvm::GlobalVariable(
getModule(), Int8Ty, /*isConstant=*/false,
@@ -1120,7 +1143,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
bool useEHCleanupForArray, const VarDecl *VD) {
FunctionArgList args;
ImplicitParamDecl Dst(getContext(), getContext().VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
args.push_back(&Dst);
const CGFunctionInfo &FI =
diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp
index 9cb7d4c7731d..bae8babb8efe 100644
--- a/clang/lib/CodeGen/CGException.cpp
+++ b/clang/lib/CodeGen/CGException.cpp
@@ -263,12 +263,7 @@ static llvm::FunctionCallee getPersonalityFn(CodeGenModule &CGM,
static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
const EHPersonality &Personality) {
llvm::FunctionCallee Fn = getPersonalityFn(CGM, Personality);
- llvm::PointerType* Int8PtrTy = llvm::PointerType::get(
- llvm::Type::getInt8Ty(CGM.getLLVMContext()),
- CGM.getDataLayout().getProgramAddressSpace());
-
- return llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(Fn.getCallee()),
- Int8PtrTy);
+ return cast<llvm::Constant>(Fn.getCallee());
}
/// Check whether a landingpad instruction only uses C++ features.
@@ -440,6 +435,15 @@ llvm::Value *CodeGenFunction::getSelectorFromSlot() {
void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E,
bool KeepInsertionPoint) {
+ // If the exception is being emitted in an OpenMP target region,
+ // and the target is a GPU, we do not support exception handling.
+ // Therefore, we emit a trap which will abort the program, and
+ // prompt a warning indicating that a trap will be emitted.
+ const llvm::Triple &T = Target.getTriple();
+ if (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN())) {
+ EmitTrapCall(llvm::Intrinsic::trap);
+ return;
+ }
if (const Expr *SubExpr = E->getSubExpr()) {
QualType ThrowType = SubExpr->getType();
if (ThrowType->isObjCObjectPointerType()) {
@@ -609,9 +613,16 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) {
}
void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) {
- EnterCXXTryStmt(S);
+ const llvm::Triple &T = Target.getTriple();
+ // If we encounter a try statement on in an OpenMP target region offloaded to
+ // a GPU, we treat it as a basic block.
+ const bool IsTargetDevice =
+ (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN()));
+ if (!IsTargetDevice)
+ EnterCXXTryStmt(S);
EmitStmt(S.getTryBlock());
- ExitCXXTryStmt(S);
+ if (!IsTargetDevice)
+ ExitCXXTryStmt(S);
}
void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
@@ -1120,6 +1131,8 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF,
// Select the right handler.
llvm::Function *llvm_eh_typeid_for =
CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for);
+ llvm::Type *argTy = llvm_eh_typeid_for->getArg(0)->getType();
+ LangAS globAS = CGF.CGM.GetGlobalVarAddressSpace(nullptr);
// Load the selector value.
llvm::Value *selector = CGF.getSelectorFromSlot();
@@ -1133,7 +1146,11 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF,
assert(handler.Type.Flags == 0 &&
"landingpads do not support catch handler flags");
assert(typeValue && "fell into catch-all case!");
- typeValue = CGF.Builder.CreateBitCast(typeValue, CGF.Int8PtrTy);
+ // With opaque ptrs, only the address space can be a mismatch.
+ if (typeValue->getType() != argTy)
+ typeValue =
+ CGF.getTargetHooks().performAddrSpaceCast(CGF, typeValue, globAS,
+ LangAS::Default, argTy);
// Figure out the next block.
bool nextIsEnd;
@@ -1816,13 +1833,11 @@ Address CodeGenFunction::recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF,
auto InsertPair = ParentCGF.EscapedLocals.insert(
std::make_pair(ParentAlloca, ParentCGF.EscapedLocals.size()));
int FrameEscapeIdx = InsertPair.first->second;
- // call i8* @llvm.localrecover(i8* bitcast(@parentFn), i8* %fp, i32 N)
+ // call ptr @llvm.localrecover(ptr @parentFn, ptr %fp, i32 N)
llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration(
&CGM.getModule(), llvm::Intrinsic::localrecover);
- llvm::Constant *ParentI8Fn =
- llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
RecoverCall = Builder.CreateCall(
- FrameRecoverFn, {ParentI8Fn, ParentFP,
+ FrameRecoverFn, {ParentCGF.CurFn, ParentFP,
llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)});
} else {
@@ -1885,9 +1900,7 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
// since finally funclets recover the parent FP for us.
llvm::Function *RecoverFPIntrin =
CGM.getIntrinsic(llvm::Intrinsic::eh_recoverfp);
- llvm::Constant *ParentI8Fn =
- llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
- ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
+ ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentCGF.CurFn, EntryFP});
// if the parent is a _finally, the passed-in ParentFP is the FP
// of parent _finally, not Establisher's FP (FP of outermost function).
@@ -1915,19 +1928,15 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
int FrameEscapeIdx = InsertPair.first->second;
// an example of a filter's prolog::
- // %0 = call i8* @llvm.eh.recoverfp(bitcast(@"?fin$0@0@main@@"),..)
- // %1 = call i8* @llvm.localrecover(bitcast(@"?fin$0@0@main@@"),..)
- // %2 = bitcast i8* %1 to i8**
- // %3 = load i8*, i8* *%2, align 8
- // ==> %3 is the frame-pointer of outermost host function
+ // %0 = call ptr @llvm.eh.recoverfp(@"?fin$0@0@main@@",..)
+ // %1 = call ptr @llvm.localrecover(@"?fin$0@0@main@@",..)
+ // %2 = load ptr, ptr %1, align 8
+ // ==> %2 is the frame-pointer of outermost host function
llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration(
&CGM.getModule(), llvm::Intrinsic::localrecover);
- llvm::Constant *ParentI8Fn =
- llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
ParentFP = Builder.CreateCall(
- FrameRecoverFn, {ParentI8Fn, ParentFP,
+ FrameRecoverFn, {ParentCGF.CurFn, ParentFP,
llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)});
- ParentFP = Builder.CreateBitCast(ParentFP, CGM.VoidPtrPtrTy);
ParentFP = Builder.CreateLoad(
Address(ParentFP, CGM.VoidPtrTy, getPointerAlign()));
}
@@ -2019,17 +2028,17 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
Args.push_back(ImplicitParamDecl::Create(
getContext(), /*DC=*/nullptr, StartLoc,
&getContext().Idents.get("exception_pointers"),
- getContext().VoidPtrTy, ImplicitParamDecl::Other));
+ getContext().VoidPtrTy, ImplicitParamKind::Other));
} else {
Args.push_back(ImplicitParamDecl::Create(
getContext(), /*DC=*/nullptr, StartLoc,
&getContext().Idents.get("abnormal_termination"),
- getContext().UnsignedCharTy, ImplicitParamDecl::Other));
+ getContext().UnsignedCharTy, ImplicitParamKind::Other));
}
Args.push_back(ImplicitParamDecl::Create(
getContext(), /*DC=*/nullptr, StartLoc,
&getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy,
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
}
QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy;
@@ -2184,9 +2193,7 @@ void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
// in place of the RTTI typeinfo global that C++ EH uses.
llvm::Function *FilterFunc =
HelperCGF.GenerateSEHFilterFunction(*this, *Except);
- llvm::Constant *OpaqueFunc =
- llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy);
- CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except.ret"));
+ CatchScope->setHandler(0, FilterFunc, createBasicBlock("__except.ret"));
}
void CodeGenFunction::ExitSEHTryStmt(const SEHTryStmt &S) {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index ed6095f7cfeb..69cf7f76be9a 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -30,6 +30,7 @@
#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
@@ -51,6 +52,12 @@
using namespace clang;
using namespace CodeGen;
+// Experiment to make sanitizers easier to debug
+static llvm::cl::opt<bool> ClSanitizeDebugDeoptimization(
+ "ubsan-unique-traps", llvm::cl::Optional,
+ llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check"),
+ llvm::cl::init(false));
+
//===--------------------------------------------------------------------===//
// Miscellaneous Helper Methods
//===--------------------------------------------------------------------===//
@@ -140,9 +147,8 @@ Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
auto *VectorTy = llvm::FixedVectorType::get(ArrayTy->getElementType(),
ArrayTy->getNumElements());
- Result = Address(
- Builder.CreateBitCast(Result.getPointer(), VectorTy->getPointerTo()),
- VectorTy, Result.getAlignment(), KnownNonNull);
+ Result = Address(Result.getPointer(), VectorTy, Result.getAlignment(),
+ KnownNonNull);
}
return Result;
}
@@ -392,7 +398,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF,
QualType Ty = Inner->getType();
if (CGF.CGM.getCodeGenOpts().MergeAllConstants &&
(Ty->isArrayType() || Ty->isRecordType()) &&
- CGF.CGM.isTypeConstant(Ty, true, false))
+ Ty.isConstantStorage(CGF.getContext(), true, false))
if (auto Init = ConstantEmitter(CGF).tryEmitAbstract(Inner, Ty)) {
auto AS = CGF.CGM.GetGlobalConstantAddressSpace();
auto *GV = new llvm::GlobalVariable(
@@ -444,9 +450,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
Address Object = createReferenceTemporary(*this, M, E);
if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) {
llvm::Type *Ty = ConvertTypeForMem(E->getType());
- Object = Address(llvm::ConstantExpr::getBitCast(
- Var, Ty->getPointerTo(Object.getAddressSpace())),
- Ty, Object.getAlignment());
+ Object = Object.withElementType(Ty);
// createReferenceTemporary will promote the temporary to a global with a
// constant initializer if it can. It can only do this to a value of
@@ -502,11 +506,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
if (auto *Var = dyn_cast<llvm::GlobalVariable>(
Object.getPointer()->stripPointerCasts())) {
llvm::Type *TemporaryType = ConvertTypeForMem(E->getType());
- Object = Address(llvm::ConstantExpr::getBitCast(
- cast<llvm::Constant>(Object.getPointer()),
- TemporaryType->getPointerTo()),
- TemporaryType,
- Object.getAlignment());
+ Object = Object.withElementType(TemporaryType);
// If the temporary is a global and has a constant initializer or is a
// constant temporary that we promoted to a global, we may have already
// initialized it.
@@ -746,9 +746,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
llvm::Value *Min = Builder.getFalse();
llvm::Value *NullIsUnknown = Builder.getFalse();
llvm::Value *Dynamic = Builder.getFalse();
- llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy);
llvm::Value *LargeEnough = Builder.CreateICmpUGE(
- Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown, Dynamic}), Size);
+ Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}), Size);
Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize));
}
}
@@ -825,9 +824,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
// Load the vptr, and compute hash_16_bytes(TypeHash, vptr).
llvm::Value *Low = llvm::ConstantInt::get(Int64Ty, TypeHash);
- llvm::Type *VPtrTy = llvm::PointerType::get(IntPtrTy, 0);
- Address VPtrAddr(Builder.CreateBitCast(Ptr, VPtrTy), IntPtrTy,
- getPointerAlign());
+ Address VPtrAddr(Ptr, IntPtrTy, getPointerAlign());
llvm::Value *VPtrVal = Builder.CreateLoad(VPtrAddr);
llvm::Value *High = Builder.CreateZExt(VPtrVal, Int64Ty);
@@ -929,16 +926,27 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
if (CE->getCastKind() == CK_ArrayToPointerDecay &&
!CE->getSubExpr()->isFlexibleArrayMemberLike(CGF.getContext(),
StrictFlexArraysLevel)) {
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
IndexedType = CE->getSubExpr()->getType();
const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe();
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
return CGF.Builder.getInt(CAT->getSize());
- else if (const auto *VAT = dyn_cast<VariableArrayType>(AT))
+
+ if (const auto *VAT = dyn_cast<VariableArrayType>(AT))
return CGF.getVLASize(VAT).NumElts;
// Ignore pass_object_size here. It's not applicable on decayed pointers.
}
+
+ if (const ValueDecl *VD = CGF.FindCountedByField(Base)) {
+ IndexedType = Base->getType();
+ const Expr *E = CGF.BuildCountedByFieldExpr(Base, VD);
+ return CGF.EmitAnyExprToTemp(E).getScalarVal();
+ }
}
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
QualType EltTy{Base->getType()->getPointeeOrArrayElementType(), 0};
if (llvm::Value *POS = CGF.LoadPassedObjectSize(Base, EltTy)) {
IndexedType = Base->getType();
@@ -948,13 +956,122 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
return nullptr;
}
+const Expr *
+CodeGenFunction::BuildCountedByFieldExpr(const Expr *Base,
+ const ValueDecl *CountedByVD) {
+ // Find the outer struct expr (i.e. p in p->a.b.c.d).
+ Expr *CountedByExpr = const_cast<Expr *>(Base)->IgnoreParenImpCasts();
+
+ // Work our way up the expression until we reach the DeclRefExpr.
+ while (!isa<DeclRefExpr>(CountedByExpr))
+ if (const auto *ME = dyn_cast<MemberExpr>(CountedByExpr))
+ CountedByExpr = ME->getBase()->IgnoreParenImpCasts();
+
+ // Add back an implicit cast to create the required pr-value.
+ CountedByExpr = ImplicitCastExpr::Create(
+ getContext(), CountedByExpr->getType(), CK_LValueToRValue, CountedByExpr,
+ nullptr, VK_PRValue, FPOptionsOverride());
+
+ if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountedByVD)) {
+ // The counted_by field is inside an anonymous struct / union. The
+ // IndirectFieldDecl has the correct order of FieldDecls to build this
+ // easily. (Yay!)
+ for (NamedDecl *ND : IFD->chain()) {
+ auto *VD = cast<ValueDecl>(ND);
+ CountedByExpr =
+ MemberExpr::CreateImplicit(getContext(), CountedByExpr,
+ CountedByExpr->getType()->isPointerType(),
+ VD, VD->getType(), VK_LValue, OK_Ordinary);
+ }
+ } else {
+ CountedByExpr = MemberExpr::CreateImplicit(
+ getContext(), const_cast<Expr *>(CountedByExpr),
+ CountedByExpr->getType()->isPointerType(),
+ const_cast<ValueDecl *>(CountedByVD), CountedByVD->getType(), VK_LValue,
+ OK_Ordinary);
+ }
+
+ return CountedByExpr;
+}
+
+const ValueDecl *
+CodeGenFunction::FindFlexibleArrayMemberField(ASTContext &Ctx,
+ const RecordDecl *RD) {
+ const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
+ getLangOpts().getStrictFlexArraysLevel();
+
+ for (const Decl *D : RD->decls()) {
+ if (const auto *VD = dyn_cast<ValueDecl>(D);
+ VD && Decl::isFlexibleArrayMemberLike(
+ Ctx, VD, VD->getType(), StrictFlexArraysLevel,
+ /*IgnoreTemplateOrMacroSubstitution=*/true))
+ return VD;
+
+ if (const auto *Record = dyn_cast<RecordDecl>(D))
+ if (const ValueDecl *VD = FindFlexibleArrayMemberField(Ctx, Record))
+ return VD;
+ }
+
+ return nullptr;
+}
+
+const ValueDecl *CodeGenFunction::FindCountedByField(const Expr *Base) {
+ ASTContext &Ctx = getContext();
+ const RecordDecl *OuterRD = nullptr;
+ const FieldDecl *FD = nullptr;
+
+ Base = Base->IgnoreParenImpCasts();
+
+ // Get the outer-most lexical RecordDecl.
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
+ QualType Ty = DRE->getDecl()->getType();
+ if (Ty->isPointerType())
+ Ty = Ty->getPointeeType();
+
+ if (const auto *RD = Ty->getAsRecordDecl())
+ OuterRD = RD->getOuterLexicalRecordContext();
+ } else if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
+ if (const ValueDecl *MD = ME->getMemberDecl()) {
+ OuterRD = MD->getDeclContext()->getOuterLexicalRecordContext();
+
+ const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
+ getLangOpts().getStrictFlexArraysLevel();
+ if (Decl::isFlexibleArrayMemberLike(
+ Ctx, MD, MD->getType(), StrictFlexArraysLevel,
+ /*IgnoreTemplateOrMacroSubstitution=*/true))
+ // Base is referencing the FAM itself.
+ FD = dyn_cast<FieldDecl>(MD);
+ }
+ }
+
+ if (!OuterRD)
+ return nullptr;
+
+ if (!FD) {
+ const ValueDecl *VD = FindFlexibleArrayMemberField(Ctx, OuterRD);
+ FD = dyn_cast_if_present<FieldDecl>(VD);
+ if (!FD)
+ return nullptr;
+ }
+
+ const auto *CBA = FD->getAttr<CountedByAttr>();
+ if (!CBA)
+ return nullptr;
+
+ DeclarationName DName(CBA->getCountedByField());
+ DeclContext::lookup_result Lookup = OuterRD->lookup(DName);
+
+ if (Lookup.empty())
+ return nullptr;
+
+ return dyn_cast<ValueDecl>(Lookup.front());
+}
+
void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
llvm::Value *Index, QualType IndexType,
bool Accessed) {
assert(SanOpts.has(SanitizerKind::ArrayBounds) &&
"should not be called unless adding bounds checks");
- SanitizerScope SanScope(this);
-
const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
getLangOpts().getStrictFlexArraysLevel();
@@ -964,6 +1081,8 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
if (!Bound)
return;
+ SanitizerScope SanScope(this);
+
bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType();
llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned);
llvm::Value *BoundVal = Builder.CreateIntCast(Bound, SizeTy, false);
@@ -1216,7 +1335,7 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E,
const char *Name) {
ErrorUnsupported(E, Name);
llvm::Type *ElTy = ConvertType(E->getType());
- llvm::Type *Ty = llvm::PointerType::getUnqual(ElTy);
+ llvm::Type *Ty = UnqualPtrTy;
return MakeAddrLValue(
Address(llvm::UndefValue::get(Ty), ElTy, CharUnits::One()), E->getType());
}
@@ -2039,6 +2158,14 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
llvm::Value *Vec = Builder.CreateLoad(LV.getExtVectorAddress(),
LV.isVolatileQualified());
+ // HLSL allows treating scalars as one-element vectors. Converting the scalar
+ // IR value to a vector here allows the rest of codegen to behave as normal.
+ if (getLangOpts().HLSL && !Vec->getType()->isVectorTy()) {
+ llvm::Type *DstTy = llvm::FixedVectorType::get(Vec->getType(), 1);
+ llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
+ Vec = Builder.CreateInsertElement(DstTy, Vec, Zero, "cast.splat");
+ }
+
const llvm::Constant *Elts = LV.getExtVectorElts();
// If the result of the expression is a non-vector type, we must be extracting
@@ -2308,10 +2435,20 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
LValue Dst) {
+ // HLSL allows storing to scalar values through ExtVector component LValues.
+ // To support this we need to handle the case where the destination address is
+ // a scalar.
+ Address DstAddr = Dst.getExtVectorAddress();
+ if (!DstAddr.getElementType()->isVectorTy()) {
+ assert(!Dst.getType()->isVectorType() &&
+ "this should only occur for non-vector l-values");
+ Builder.CreateStore(Src.getScalarVal(), DstAddr, Dst.isVolatileQualified());
+ return;
+ }
+
// This access turns into a read/modify/write of the vector. Load the input
// value now.
- llvm::Value *Vec = Builder.CreateLoad(Dst.getExtVectorAddress(),
- Dst.isVolatileQualified());
+ llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified());
const llvm::Constant *Elts = Dst.getExtVectorElts();
llvm::Value *SrcVal = Src.getScalarVal();
@@ -2359,7 +2496,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
llvm_unreachable("unexpected shorten vector length");
}
} else {
- // If the Src is a scalar (not a vector) it must be updating one element.
+ // If the Src is a scalar (not a vector), and the target is a vector it must
+ // be updating one element.
unsigned InIdx = getAccessedFieldNo(0, Elts);
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
@@ -2492,14 +2630,6 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
}
}
-static llvm::Value *
-EmitBitCastOfLValueToProperType(CodeGenFunction &CGF,
- llvm::Value *V, llvm::Type *IRType,
- StringRef Name = StringRef()) {
- unsigned AS = cast<llvm::PointerType>(V->getType())->getAddressSpace();
- return CGF.Builder.CreateBitCast(V, IRType->getPointerTo(AS), Name);
-}
-
static LValue EmitThreadPrivateVarDeclLValue(
CodeGenFunction &CGF, const VarDecl *VD, QualType T, Address Addr,
llvm::Type *RealVarTy, SourceLocation Loc) {
@@ -2600,7 +2730,6 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
V = CGF.Builder.CreateThreadLocalAddress(V);
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
- V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
Address Addr(V, RealVarTy, Alignment);
// Emit reference to the private copy of the variable if it is an OpenMP
@@ -2627,19 +2756,6 @@ static llvm::Constant *EmitFunctionDeclPointer(CodeGenModule &CGM,
}
llvm::Constant *V = CGM.GetAddrOfFunction(GD);
- if (!FD->hasPrototype()) {
- if (const FunctionProtoType *Proto =
- FD->getType()->getAs<FunctionProtoType>()) {
- // Ugly case: for a K&R-style definition, the type of the definition
- // isn't the same as the type of a use. Correct for this with a
- // bitcast.
- QualType NoProtoType =
- CGM.getContext().getFunctionNoProtoType(Proto->getReturnType());
- NoProtoType = CGM.getContext().getPointerType(NoProtoType);
- V = llvm::ConstantExpr::getBitCast(V,
- CGM.getTypes().ConvertType(NoProtoType));
- }
- }
return V;
}
@@ -2654,9 +2770,8 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, const Expr *E,
static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD,
llvm::Value *ThisValue) {
- QualType TagType = CGF.getContext().getTagDeclType(FD->getParent());
- LValue LV = CGF.MakeNaturalAlignAddrLValue(ThisValue, TagType);
- return CGF.EmitLValueForField(LV, FD);
+
+ return CGF.EmitLValueForLambdaField(FD, ThisValue);
}
/// Named Registers are named metadata pointing to the register name
@@ -2692,8 +2807,7 @@ static LValue EmitGlobalNamedRegister(const VarDecl *VD, CodeGenModule &CGM) {
/// this context.
static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF,
const DeclRefExpr *E,
- const VarDecl *VD,
- bool IsConstant) {
+ const VarDecl *VD) {
// For a variable declared in an enclosing scope, do not emit a spurious
// reference even if we have a capture, as that will emit an unwarranted
// reference to our capture state, and will likely generate worse code than
@@ -2726,7 +2840,7 @@ static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF,
// We can emit a spurious reference only if the linkage implies that we'll
// be emitting a non-interposable symbol that will be retained until link
// time.
- switch (CGF.CGM.getLLVMLinkageVarDefinition(VD, IsConstant)) {
+ switch (CGF.CGM.getLLVMLinkageVarDefinition(VD)) {
case llvm::GlobalValue::ExternalLinkage:
case llvm::GlobalValue::LinkOnceODRLinkage:
case llvm::GlobalValue::WeakODRLinkage:
@@ -2757,7 +2871,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// constant value directly instead.
if (E->isNonOdrUse() == NOUR_Constant &&
(VD->getType()->isReferenceType() ||
- !canEmitSpuriousReferenceToVariable(*this, E, VD, true))) {
+ !canEmitSpuriousReferenceToVariable(*this, E, VD))) {
VD->getAnyInitializer(VD);
llvm::Constant *Val = ConstantEmitter(*this).emitAbstract(
E->getLocation(), *VD->evaluateValue(), VD->getType());
@@ -2859,7 +2973,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// some reason; most likely, because it's in an outer function.
} else if (VD->isStaticLocal()) {
llvm::Constant *var = CGM.getOrCreateStaticVarDecl(
- *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false));
+ *VD, CGM.getLLVMLinkageVarDefinition(VD));
addr = Address(
var, ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD));
@@ -2943,9 +3057,20 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
return MakeAddrLValue(CGM.GetAddrOfMSGuidDecl(GD), T,
AlignmentSource::Decl);
- if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(ND))
- return MakeAddrLValue(CGM.GetAddrOfTemplateParamObject(TPO), T,
- AlignmentSource::Decl);
+ if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(ND)) {
+ auto ATPO = CGM.GetAddrOfTemplateParamObject(TPO);
+ auto AS = getLangASFromTargetAS(ATPO.getAddressSpace());
+
+ if (AS != T.getAddressSpace()) {
+ auto TargetAS = getContext().getTargetAddressSpace(T.getAddressSpace());
+ auto PtrTy = ATPO.getElementType()->getPointerTo(TargetAS);
+ auto ASC = getTargetHooks().performAddrSpaceCast(
+ CGM, ATPO.getPointer(), AS, T.getAddressSpace(), PtrTy);
+ ATPO = ConstantAddress(ASC, ATPO.getElementType(), ATPO.getAlignment());
+ }
+
+ return MakeAddrLValue(ATPO, T, AlignmentSource::Decl);
+ }
llvm_unreachable("Unhandled DeclRefExpr");
}
@@ -3421,8 +3546,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck(
"__cfi_slowpath_diag",
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy},
false));
- CheckCall = Builder.CreateCall(
- SlowPathFn, {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)});
+ CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr, InfoPtr});
} else {
SlowPathFn = CGM.getModule().getOrInsertFunction(
"__cfi_slowpath",
@@ -3445,14 +3569,12 @@ void CodeGenFunction::EmitCfiCheckStub() {
llvm::Function *F = llvm::Function::Create(
llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy}, false),
llvm::GlobalValue::WeakAnyLinkage, "__cfi_check", M);
+ F->setAlignment(llvm::Align(4096));
CGM.setDSOLocal(F);
llvm::BasicBlock *BB = llvm::BasicBlock::Create(Ctx, "entry", F);
- // FIXME: consider emitting an intrinsic call like
- // call void @llvm.cfi_check(i64 %0, i8* %1, i8* %2)
- // which can be lowered in CrossDSOCFI pass to the actual contents of
- // __cfi_check. This would allow inlining of __cfi_check calls.
- llvm::CallInst::Create(
- llvm::Intrinsic::getDeclaration(M, llvm::Intrinsic::trap), "", BB);
+ // CrossDSOCFI pass is not executed if there is no executable code.
+ SmallVector<llvm::Value*> Args{F->getArg(2), F->getArg(1)};
+ llvm::CallInst::Create(M->getFunction("__cfi_check_fail"), Args, "", BB);
llvm::ReturnInst::Create(Ctx, nullptr, BB);
}
@@ -3467,9 +3589,9 @@ void CodeGenFunction::EmitCfiCheckFail() {
SanitizerScope SanScope(this);
FunctionArgList Args;
ImplicitParamDecl ArgData(getContext(), getContext().VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl ArgAddr(getContext(), getContext().VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&ArgData);
Args.push_back(&ArgAddr);
@@ -3570,17 +3692,28 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
// check-type per function to save on code size.
if (TrapBBs.size() <= CheckHandlerID)
TrapBBs.resize(CheckHandlerID + 1);
+
llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID];
- if (!CGM.getCodeGenOpts().OptimizationLevel || !TrapBB ||
- (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+ if (!ClSanitizeDebugDeoptimization &&
+ CGM.getCodeGenOpts().OptimizationLevel && TrapBB &&
+ (!CurCodeDecl || !CurCodeDecl->hasAttr<OptimizeNoneAttr>())) {
+ auto Call = TrapBB->begin();
+ assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
+
+ Call->applyMergedLocation(Call->getDebugLoc(),
+ Builder.getCurrentDebugLocation());
+ Builder.CreateCondBr(Checked, Cont, TrapBB);
+ } else {
TrapBB = createBasicBlock("trap");
Builder.CreateCondBr(Checked, Cont, TrapBB);
EmitBlock(TrapBB);
- llvm::CallInst *TrapCall =
- Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
- llvm::ConstantInt::get(CGM.Int8Ty, CheckHandlerID));
+ llvm::CallInst *TrapCall = Builder.CreateCall(
+ CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
+ llvm::ConstantInt::get(CGM.Int8Ty, ClSanitizeDebugDeoptimization
+ ? TrapBB->getParent()->size()
+ : CheckHandlerID));
if (!CGM.getCodeGenOpts().TrapFuncName.empty()) {
auto A = llvm::Attribute::get(getLLVMContext(), "trap-func-name",
@@ -3590,13 +3723,6 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
TrapCall->setDoesNotReturn();
TrapCall->setDoesNotThrow();
Builder.CreateUnreachable();
- } else {
- auto Call = TrapBB->begin();
- assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
-
- Call->applyMergedLocation(Call->getDebugLoc(),
- Builder.getCurrentDebugLocation());
- Builder.CreateCondBr(Checked, Cont, TrapBB);
}
EmitBlock(Cont);
@@ -3707,6 +3833,33 @@ static QualType getFixedSizeElementType(const ASTContext &ctx,
return eltType;
}
+static bool hasBPFPreserveStaticOffset(const RecordDecl *D) {
+ return D && D->hasAttr<BPFPreserveStaticOffsetAttr>();
+}
+
+static bool hasBPFPreserveStaticOffset(const Expr *E) {
+ if (!E)
+ return false;
+ QualType PointeeType = E->getType()->getPointeeType();
+ if (PointeeType.isNull())
+ return false;
+ if (const auto *BaseDecl = PointeeType->getAsRecordDecl())
+ return hasBPFPreserveStaticOffset(BaseDecl);
+ return false;
+}
+
+// Wraps Addr with a call to llvm.preserve.static.offset intrinsic.
+static Address wrapWithBPFPreserveStaticOffset(CodeGenFunction &CGF,
+ Address &Addr) {
+ if (!CGF.getTarget().getTriple().isBPF())
+ return Addr;
+
+ llvm::Function *Fn =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::preserve_static_offset);
+ llvm::CallInst *Call = CGF.Builder.CreateCall(Fn, {Addr.getPointer()});
+ return Address(Call, Addr.getElementType(), Addr.getAlignment());
+}
+
/// Given an array base, check whether its member access belongs to a record
/// with preserve_access_index attribute or not.
static bool IsPreserveAIArrayBase(CodeGenFunction &CGF, const Expr *ArrayBase) {
@@ -3768,6 +3921,9 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
CharUnits eltAlign =
getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
+ if (hasBPFPreserveStaticOffset(Base))
+ addr = wrapWithBPFPreserveStaticOffset(CGF, addr);
+
llvm::Value *eltPtr;
auto LastIndex = dyn_cast<llvm::ConstantInt>(indices.back());
if (!LastIndex ||
@@ -4269,17 +4425,38 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
/// Given that we are currently emitting a lambda, emit an l-value for
/// one of its members.
-LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) {
- if (CurCodeDecl) {
- assert(cast<CXXMethodDecl>(CurCodeDecl)->getParent()->isLambda());
- assert(cast<CXXMethodDecl>(CurCodeDecl)->getParent() == Field->getParent());
+///
+LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field,
+ llvm::Value *ThisValue) {
+ bool HasExplicitObjectParameter = false;
+ if (const auto *MD = dyn_cast_if_present<CXXMethodDecl>(CurCodeDecl)) {
+ HasExplicitObjectParameter = MD->isExplicitObjectMemberFunction();
+ assert(MD->getParent()->isLambda());
+ assert(MD->getParent() == Field->getParent());
+ }
+ LValue LambdaLV;
+ if (HasExplicitObjectParameter) {
+ const VarDecl *D = cast<CXXMethodDecl>(CurCodeDecl)->getParamDecl(0);
+ auto It = LocalDeclMap.find(D);
+ assert(It != LocalDeclMap.end() && "explicit parameter not loaded?");
+ Address AddrOfExplicitObject = It->getSecond();
+ if (D->getType()->isReferenceType())
+ LambdaLV = EmitLoadOfReferenceLValue(AddrOfExplicitObject, D->getType(),
+ AlignmentSource::Decl);
+ else
+ LambdaLV = MakeNaturalAlignAddrLValue(AddrOfExplicitObject.getPointer(),
+ D->getType().getNonReferenceType());
+ } else {
+ QualType LambdaTagType = getContext().getTagDeclType(Field->getParent());
+ LambdaLV = MakeNaturalAlignAddrLValue(ThisValue, LambdaTagType);
}
- QualType LambdaTagType =
- getContext().getTagDeclType(Field->getParent());
- LValue LambdaLV = MakeNaturalAlignAddrLValue(CXXABIThisValue, LambdaTagType);
return EmitLValueForField(LambdaLV, Field);
}
+LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) {
+ return EmitLValueForLambdaField(Field, CXXABIThisValue);
+}
+
/// Get the field index in the debug info. The debug info structure/union
/// will ignore the unnamed bitfields.
unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec,
@@ -4375,6 +4552,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
Address Addr = base.getAddress(*this);
unsigned Idx = RL.getLLVMFieldNo(field);
const RecordDecl *rec = field->getParent();
+ if (hasBPFPreserveStaticOffset(rec))
+ Addr = wrapWithBPFPreserveStaticOffset(*this, Addr);
if (!UseVolatile) {
if (!IsInPreservedAIRegion &&
(!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
@@ -4447,6 +4626,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
}
Address addr = base.getAddress(*this);
+ if (hasBPFPreserveStaticOffset(rec))
+ addr = wrapWithBPFPreserveStaticOffset(*this, addr);
if (auto *ClassDef = dyn_cast<CXXRecordDecl>(rec)) {
if (CGM.getCodeGenOpts().StrictVTablePointers &&
ClassDef->isDynamicClass()) {
@@ -4616,7 +4797,7 @@ std::optional<LValue> HandleConditionalOperatorLValueSimpleCase(
if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(Live->IgnoreParens())) {
CGF.EmitCXXThrowExpr(ThrowExpr);
llvm::Type *ElemTy = CGF.ConvertType(Dead->getType());
- llvm::Type *Ty = llvm::PointerType::getUnqual(ElemTy);
+ llvm::Type *Ty = CGF.UnqualPtrTy;
return CGF.MakeAddrLValue(
Address(llvm::UndefValue::get(Ty), ElemTy, CharUnits::One()),
Dead->getType());
@@ -4751,7 +4932,6 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
case CK_IntegralToPointer:
case CK_PointerToIntegral:
case CK_PointerToBoolean:
- case CK_VectorSplat:
case CK_IntegralCast:
case CK_BooleanToSignedIntegral:
case CK_IntegralToBoolean:
@@ -4819,6 +4999,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
// bound and change the IR type.
// FIXME: Once pointee types are removed from IR, remove this.
LValue LV = EmitLValue(E->getSubExpr());
+ // Propagate the volatile qualifer to LValue, if exist in E.
+ if (E->changesVolatileQualification())
+ LV.getQuals() = E->getType().getQualifiers();
if (LV.isSimple()) {
Address V = LV.getAddress(*this);
if (V.isValid()) {
@@ -4913,6 +5096,13 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
}
case CK_ZeroToOCLOpaqueType:
llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid");
+
+ case CK_VectorSplat: {
+ // LValue results of vector splats are only supported in HLSL.
+ if (!getLangOpts().HLSL)
+ return EmitUnsupportedLValue(E, "unexpected cast lvalue");
+ return EmitLValue(E->getSubExpr());
+ }
}
llvm_unreachable("Unhandled lvalue cast kind?");
@@ -4991,9 +5181,12 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
if (const auto *CE = dyn_cast<CUDAKernelCallExpr>(E))
return EmitCUDAKernelCallExpr(CE, ReturnValue);
+ // A CXXOperatorCallExpr is created even for explicit object methods, but
+ // these should be treated like static function call.
if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E))
- if (const CXXMethodDecl *MD =
- dyn_cast_or_null<CXXMethodDecl>(CE->getCalleeDecl()))
+ if (const auto *MD =
+ dyn_cast_if_present<CXXMethodDecl>(CE->getCalleeDecl());
+ MD && MD->isImplicitObjectMemberFunction())
return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue);
CGCallee callee = EmitCallee(E->getCallee());
@@ -5365,8 +5558,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
AlignedCalleePtr = CalleePtr;
}
- llvm::Value *CalleePrefixStruct = Builder.CreateBitCast(
- AlignedCalleePtr, llvm::PointerType::getUnqual(PrefixStructTy));
+ llvm::Value *CalleePrefixStruct = AlignedCalleePtr;
llvm::Value *CalleeSigPtr =
Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 0);
llvm::Value *CalleeSig =
@@ -5413,9 +5605,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD);
llvm::Value *CalleePtr = Callee.getFunctionPointer();
- llvm::Value *CastedCallee = Builder.CreateBitCast(CalleePtr, Int8PtrTy);
llvm::Value *TypeTest = Builder.CreateCall(
- CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedCallee, TypeId});
+ CGM.getIntrinsic(llvm::Intrinsic::type_test), {CalleePtr, TypeId});
auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
llvm::Constant *StaticData[] = {
@@ -5425,18 +5616,17 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
};
if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
EmitCfiSlowPathCheck(SanitizerKind::CFIICall, TypeTest, CrossDsoTypeId,
- CastedCallee, StaticData);
+ CalleePtr, StaticData);
} else {
EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIICall),
SanitizerHandler::CFICheckFail, StaticData,
- {CastedCallee, llvm::UndefValue::get(IntPtrTy)});
+ {CalleePtr, llvm::UndefValue::get(IntPtrTy)});
}
}
CallArgList Args;
if (Chain)
- Args.add(RValue::get(Builder.CreateBitCast(Chain, CGM.VoidPtrTy)),
- CGM.getContext().VoidPtrTy);
+ Args.add(RValue::get(Chain), CGM.getContext().VoidPtrTy);
// C++17 requires that we evaluate arguments to a call using assignment syntax
// right-to-left, and that we evaluate arguments to certain other operators
@@ -5507,10 +5697,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
isa<CUDAKernelCallExpr>(E) &&
(!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
llvm::Value *Handle = Callee.getFunctionPointer();
- auto *Cast =
- Builder.CreateBitCast(Handle, Handle->getType()->getPointerTo());
auto *Stub = Builder.CreateLoad(
- Address(Cast, Handle->getType(), CGM.getPointerAlign()));
+ Address(Handle, Handle->getType(), CGM.getPointerAlign()));
Callee.setFunctionPointer(Stub);
}
llvm::CallBase *CallOrInvoke = nullptr;
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 4d3f3e9603d9..98ae56e2df88 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -41,7 +41,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, GlobalDecl GD,
assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) ||
isa<CXXOperatorCallExpr>(CE));
- assert(MD->isInstance() &&
+ assert(MD->isImplicitObjectMemberFunction() &&
"Trying to emit a member or operator call expr on a static method!");
// Push the this ptr.
@@ -66,7 +66,12 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, GlobalDecl GD,
Args.addFrom(*RtlArgs);
} else if (CE) {
// Special case: skip first argument of CXXOperatorCall (it is "this").
- unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(CE) ? 1 : 0;
+ unsigned ArgsToSkip = 0;
+ if (const auto *Op = dyn_cast<CXXOperatorCallExpr>(CE)) {
+ if (const auto *M = dyn_cast<CXXMethodDecl>(Op->getCalleeDecl()))
+ ArgsToSkip =
+ static_cast<unsigned>(!M->isExplicitObjectMemberFunction());
+ }
CGF.EmitCallArgs(Args, FPT, drop_begin(CE->arguments(), ArgsToSkip),
CE->getDirectCallee());
} else {
@@ -484,7 +489,7 @@ RValue
CodeGenFunction::EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
const CXXMethodDecl *MD,
ReturnValueSlot ReturnValue) {
- assert(MD->isInstance() &&
+ assert(MD->isImplicitObjectMemberFunction() &&
"Trying to emit a member call expr on a static method!");
return EmitCXXMemberOrOperatorMemberCallExpr(
E, MD, ReturnValue, /*HasQualifier=*/false, /*Qualifier=*/nullptr,
@@ -595,12 +600,12 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
// already zeroed.
if (E->requiresZeroInitialization() && !Dest.isZeroed()) {
switch (E->getConstructionKind()) {
- case CXXConstructExpr::CK_Delegating:
- case CXXConstructExpr::CK_Complete:
+ case CXXConstructionKind::Delegating:
+ case CXXConstructionKind::Complete:
EmitNullInitialization(Dest.getAddress(), E->getType());
break;
- case CXXConstructExpr::CK_VirtualBase:
- case CXXConstructExpr::CK_NonVirtualBase:
+ case CXXConstructionKind::VirtualBase:
+ case CXXConstructionKind::NonVirtualBase:
EmitNullBaseClassInitialization(*this, Dest.getAddress(),
CD->getParent());
break;
@@ -636,21 +641,21 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
bool Delegating = false;
switch (E->getConstructionKind()) {
- case CXXConstructExpr::CK_Delegating:
+ case CXXConstructionKind::Delegating:
// We should be emitting a constructor; GlobalDecl will assert this
Type = CurGD.getCtorType();
Delegating = true;
break;
- case CXXConstructExpr::CK_Complete:
+ case CXXConstructionKind::Complete:
Type = Ctor_Complete;
break;
- case CXXConstructExpr::CK_VirtualBase:
+ case CXXConstructionKind::VirtualBase:
ForVirtualBase = true;
[[fallthrough]];
- case CXXConstructExpr::CK_NonVirtualBase:
+ case CXXConstructionKind::NonVirtualBase:
Type = Ctor_Base;
}
@@ -1101,9 +1106,7 @@ void CodeGenFunction::EmitNewArrayInitializer(
// element. TODO: some of these stores can be trivially
// observed to be unnecessary.
if (EndOfInit.isValid()) {
- auto FinishedPtr =
- Builder.CreateBitCast(CurPtr.getPointer(), BeginPtr.getType());
- Builder.CreateStore(FinishedPtr, EndOfInit);
+ Builder.CreateStore(CurPtr.getPointer(), EndOfInit);
}
// FIXME: If the last initializer is an incomplete initializer list for
// an array, and we have an array filler, we can fold together the two
@@ -2195,11 +2198,19 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,
llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) {
llvm::Type *PtrTy = llvm::PointerType::getUnqual(getLLVMContext());
+ LangAS GlobAS = CGM.GetGlobalVarAddressSpace(nullptr);
+
+ auto MaybeASCast = [=](auto &&TypeInfo) {
+ if (GlobAS == LangAS::Default)
+ return TypeInfo;
+ return getTargetHooks().performAddrSpaceCast(CGM,TypeInfo, GlobAS,
+ LangAS::Default, PtrTy);
+ };
if (E->isTypeOperand()) {
llvm::Constant *TypeInfo =
CGM.GetAddrOfRTTIDescriptor(E->getTypeOperand(getContext()));
- return TypeInfo;
+ return MaybeASCast(TypeInfo);
}
// C++ [expr.typeid]p2:
@@ -2212,7 +2223,7 @@ llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) {
return EmitTypeidFromVTable(*this, E->getExprOperand(), PtrTy);
QualType OperandTy = E->getExprOperand()->getType();
- return CGM.GetAddrOfRTTIDescriptor(OperandTy);
+ return MaybeASCast(CGM.GetAddrOfRTTIDescriptor(OperandTy));
}
static llvm::Value *EmitDynamicCastToNull(CodeGenFunction &CGF,
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index 2dd1a991ec97..f3cbd1d0451e 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -177,11 +177,15 @@ public:
ComplexPairTy VisitImplicitCastExpr(ImplicitCastExpr *E) {
// Unlike for scalars, we don't have to worry about function->ptr demotion
// here.
+ if (E->changesVolatileQualification())
+ return EmitLoadOfLValue(E);
return EmitCast(E->getCastKind(), E->getSubExpr(), E->getType());
}
ComplexPairTy VisitCastExpr(CastExpr *E) {
if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
CGF.CGM.EmitExplicitCastExprType(ECE, &CGF);
+ if (E->changesVolatileQualification())
+ return EmitLoadOfLValue(E);
return EmitCast(E->getCastKind(), E->getSubExpr(), E->getType());
}
ComplexPairTy VisitCallExpr(const CallExpr *E);
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index 353ee56839f3..604e3958161d 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -25,6 +25,7 @@
#include "clang/Basic/Builtins.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -934,7 +935,7 @@ tryEmitGlobalCompoundLiteral(ConstantEmitter &emitter,
auto GV = new llvm::GlobalVariable(
CGM.getModule(), C->getType(),
- CGM.isTypeConstant(E->getType(), true, false),
+ E->getType().isConstantStorage(CGM.getContext(), true, false),
llvm::GlobalValue::InternalLinkage, C, ".compoundliteral", nullptr,
llvm::GlobalVariable::NotThreadLocal,
CGM.getContext().getTargetAddressSpace(addressSpace));
@@ -1127,9 +1128,36 @@ public:
case CK_ConstructorConversion:
return Visit(subExpr, destType);
+ case CK_ArrayToPointerDecay:
+ if (const auto *S = dyn_cast<StringLiteral>(subExpr))
+ return CGM.GetAddrOfConstantStringFromLiteral(S).getPointer();
+ return nullptr;
+ case CK_NullToPointer:
+ if (Visit(subExpr, destType))
+ return CGM.EmitNullConstant(destType);
+ return nullptr;
+
case CK_IntToOCLSampler:
llvm_unreachable("global sampler variables are not generated");
+ case CK_IntegralCast: {
+ QualType FromType = subExpr->getType();
+ // See also HandleIntToIntCast in ExprConstant.cpp
+ if (FromType->isIntegerType())
+ if (llvm::Constant *C = Visit(subExpr, FromType))
+ if (auto *CI = dyn_cast<llvm::ConstantInt>(C)) {
+ unsigned SrcWidth = CGM.getContext().getIntWidth(FromType);
+ unsigned DstWidth = CGM.getContext().getIntWidth(destType);
+ if (DstWidth == SrcWidth)
+ return CI;
+ llvm::APInt A = FromType->isSignedIntegerType()
+ ? CI->getValue().sextOrTrunc(DstWidth)
+ : CI->getValue().zextOrTrunc(DstWidth);
+ return llvm::ConstantInt::get(CGM.getLLVMContext(), A);
+ }
+ return nullptr;
+ }
+
case CK_Dependent: llvm_unreachable("saw dependent cast!");
case CK_BuiltinFnToFnPtr:
@@ -1164,7 +1192,6 @@ public:
case CK_CPointerToObjCPointerCast:
case CK_BlockPointerToObjCPointerCast:
case CK_AnyPointerToBlockPointerCast:
- case CK_ArrayToPointerDecay:
case CK_FunctionToPointerDecay:
case CK_BaseToDerived:
case CK_DerivedToBase:
@@ -1183,8 +1210,6 @@ public:
case CK_IntegralComplexToFloatingComplex:
case CK_PointerToIntegral:
case CK_PointerToBoolean:
- case CK_NullToPointer:
- case CK_IntegralCast:
case CK_BooleanToSignedIntegral:
case CK_IntegralToPointer:
case CK_IntegralToBoolean:
@@ -1215,6 +1240,10 @@ public:
return Visit(E->getSubExpr(), T);
}
+ llvm::Constant *VisitIntegerLiteral(IntegerLiteral *I, QualType T) {
+ return llvm::ConstantInt::get(CGM.getLLVMContext(), I->getValue());
+ }
+
llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) {
auto *CAT = CGM.getContext().getAsConstantArrayType(ILE->getType());
assert(CAT && "can't emit array init for non-constant-bound array");
@@ -1352,6 +1381,13 @@ public:
return Visit(E->getSubExpr(), T);
}
+ llvm::Constant *VisitUnaryMinus(UnaryOperator *U, QualType T) {
+ if (llvm::Constant *C = Visit(U->getSubExpr(), T))
+ if (auto *CI = dyn_cast<llvm::ConstantInt>(C))
+ return llvm::ConstantInt::get(CGM.getLLVMContext(), -CI->getValue());
+ return nullptr;
+ }
+
// Utility methods
llvm::Type *ConvertType(QualType T) {
return CGM.getTypes().ConvertType(T);
@@ -1594,13 +1630,8 @@ namespace {
IndexValues[i] = llvm::ConstantInt::get(CGM.Int32Ty, Indices[i]);
}
- // Form a GEP and then bitcast to the placeholder type so that the
- // replacement will succeed.
- llvm::Constant *location =
- llvm::ConstantExpr::getInBoundsGetElementPtr(BaseValueTy,
- Base, IndexValues);
- location = llvm::ConstantExpr::getBitCast(location,
- placeholder->getType());
+ llvm::Constant *location = llvm::ConstantExpr::getInBoundsGetElementPtr(
+ BaseValueTy, Base, IndexValues);
Locations.insert({placeholder, location});
}
@@ -1726,7 +1757,10 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
// Zero-extend bool.
if (C->getType()->isIntegerTy(1) && !destType->isBitIntType()) {
llvm::Type *boolTy = CGM.getTypes().ConvertTypeForMem(destType);
- return llvm::ConstantExpr::getZExt(C, boolTy);
+ llvm::Constant *Res = llvm::ConstantFoldCastOperand(
+ llvm::Instruction::ZExt, C, boolTy, CGM.getDataLayout());
+ assert(Res && "Constant folding must succeed");
+ return Res;
}
return C;
@@ -1736,9 +1770,10 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E,
QualType destType) {
assert(!destType->isVoidType() && "can't emit a void constant");
- if (llvm::Constant *C =
- ConstExprEmitter(*this).Visit(const_cast<Expr *>(E), destType))
- return C;
+ if (!destType->isReferenceType())
+ if (llvm::Constant *C =
+ ConstExprEmitter(*this).Visit(const_cast<Expr *>(E), destType))
+ return C;
Expr::EvalResult Result;
@@ -1826,10 +1861,7 @@ private:
if (!hasNonZeroOffset())
return C;
- llvm::Type *origPtrTy = C->getType();
- C = llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, C, getOffset());
- C = llvm::ConstantExpr::getPointerCast(C, origPtrTy);
- return C;
+ return llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, C, getOffset());
}
};
@@ -1890,8 +1922,9 @@ ConstantLValueEmitter::tryEmitAbsolute(llvm::Type *destTy) {
// FIXME: signedness depends on the original integer type.
auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy);
llvm::Constant *C;
- C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy,
- /*isSigned*/ false);
+ C = llvm::ConstantFoldIntegerCast(getOffset(), intptrTy, /*isSigned*/ false,
+ CGM.getDataLayout());
+ assert(C && "Must have folded, as Offset is a ConstantInt");
C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy);
return C;
}
@@ -1918,7 +1951,7 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
if (VD->isLocalVarDecl()) {
return CGM.getOrCreateStaticVarDecl(
- *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false));
+ *VD, CGM.getLLVMLinkageVarDefinition(VD));
}
}
}
@@ -1996,8 +2029,6 @@ ConstantLValue
ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *E) {
assert(Emitter.CGF && "Invalid address of label expression outside function");
llvm::Constant *Ptr = Emitter.CGF->GetAddrOfLabel(E->getLabel());
- Ptr = llvm::ConstantExpr::getBitCast(Ptr,
- CGM.getTypes().ConvertType(E->getType()));
return Ptr;
}
@@ -2112,6 +2143,9 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value,
Inits[I] = llvm::ConstantInt::get(CGM.getLLVMContext(), Elt.getInt());
else if (Elt.isFloat())
Inits[I] = llvm::ConstantFP::get(CGM.getLLVMContext(), Elt.getFloat());
+ else if (Elt.isIndeterminate())
+ Inits[I] = llvm::UndefValue::get(CGM.getTypes().ConvertType(
+ DestType->castAs<VectorType>()->getElementType()));
else
llvm_unreachable("unsupported vector element type");
}
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index fe1a59b21f38..41ad2ddac30d 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1798,7 +1798,7 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
// careful, because the base of a vector subscript is occasionally an rvalue,
// so we can't get it as an lvalue.
if (!E->getBase()->getType()->isVectorType() &&
- !E->getBase()->getType()->isVLSTBuiltinType())
+ !E->getBase()->getType()->isSveVLSBuiltinType())
return EmitLoadOfLValue(E);
// Handle the vector case. The base must be a vector, the index must be an
@@ -2084,11 +2084,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
Value *Src = Visit(const_cast<Expr*>(E));
llvm::Type *SrcTy = Src->getType();
llvm::Type *DstTy = ConvertType(DestTy);
- if (SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() &&
- SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) {
- llvm_unreachable("wrong cast for pointers in different address spaces"
- "(must be an address space cast)!");
- }
+ assert(
+ (!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() ||
+ SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) &&
+ "Address-space cast must be used to convert address spaces");
if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
if (auto *PT = DestTy->getAs<PointerType>()) {
@@ -2225,16 +2224,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
return Visit(const_cast<Expr*>(E));
case CK_NoOp: {
- llvm::Value *V = Visit(const_cast<Expr *>(E));
- if (V) {
- // CK_NoOp can model a pointer qualification conversion, which can remove
- // an array bound and change the IR type.
- // FIXME: Once pointee types are removed from IR, remove this.
- llvm::Type *T = ConvertType(DestTy);
- if (T != V->getType())
- V = Builder.CreateBitCast(V, T);
- }
- return V;
+ return CE->changesVolatileQualification() ? EmitLoadOfLValue(CE)
+ : Visit(const_cast<Expr *>(E));
}
case CK_BaseToDerived: {
@@ -2580,7 +2571,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
// For atomic bool increment, we just store true and return it for
// preincrement, do an atomic swap with true for postincrement
return Builder.CreateAtomicRMW(
- llvm::AtomicRMWInst::Xchg, LV.getPointer(CGF), True,
+ llvm::AtomicRMWInst::Xchg, LV.getAddress(CGF), True,
llvm::AtomicOrdering::SequentiallyConsistent);
}
// Special case for atomic increment / decrement on integers, emit
@@ -2598,7 +2589,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
llvm::Value *amt = CGF.EmitToMemory(
llvm::ConstantInt::get(ConvertType(type), 1, true), type);
llvm::Value *old =
- Builder.CreateAtomicRMW(aop, LV.getPointer(CGF), amt,
+ Builder.CreateAtomicRMW(aop, LV.getAddress(CGF), amt,
llvm::AtomicOrdering::SequentiallyConsistent);
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
}
@@ -2764,8 +2755,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
amt = llvm::ConstantFP::get(VMContext,
llvm::APFloat(static_cast<double>(amount)));
else {
- // Remaining types are Half, LongDouble, __ibm128 or __float128. Convert
- // from float.
+ // Remaining types are Half, Bfloat16, LongDouble, __ibm128 or __float128.
+ // Convert from float.
llvm::APFloat F(static_cast<float>(amount));
bool ignored;
const llvm::fltSemantics *FS;
@@ -2775,6 +2766,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
FS = &CGF.getTarget().getFloat128Format();
else if (value->getType()->isHalfTy())
FS = &CGF.getTarget().getHalfFormat();
+ else if (value->getType()->isBFloatTy())
+ FS = &CGF.getTarget().getBFloat16Format();
else if (value->getType()->isPPC_FP128Ty())
FS = &CGF.getTarget().getIbm128Format();
else
@@ -2928,7 +2921,7 @@ Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
// Perform vector logical not on comparison with zero vector.
if (E->getType()->isVectorType() &&
E->getType()->castAs<VectorType>()->getVectorKind() ==
- VectorType::GenericVector) {
+ VectorKind::Generic) {
Value *Oper = Visit(E->getSubExpr());
Value *Zero = llvm::Constant::getNullValue(Oper->getType());
Value *Result;
@@ -3050,9 +3043,10 @@ Value *
ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
const UnaryExprOrTypeTraitExpr *E) {
QualType TypeToSize = E->getTypeOfArgument();
- if (E->getKind() == UETT_SizeOf) {
+ if (auto Kind = E->getKind();
+ Kind == UETT_SizeOf || Kind == UETT_DataSizeOf) {
if (const VariableArrayType *VAT =
- CGF.getContext().getAsVariableArrayType(TypeToSize)) {
+ CGF.getContext().getAsVariableArrayType(TypeToSize)) {
if (E->isArgumentType()) {
// sizeof(type) - make sure to emit the VLA size.
CGF.EmitVariablyModifiedType(TypeToSize);
@@ -3079,6 +3073,9 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
E->getTypeOfArgument()->getPointeeType()))
.getQuantity();
return llvm::ConstantInt::get(CGF.SizeTy, Alignment);
+ } else if (E->getKind() == UETT_VectorElements) {
+ auto *VecTy = cast<llvm::VectorType>(ConvertType(E->getTypeOfArgument()));
+ return Builder.CreateElementCount(CGF.SizeTy, VecTy->getElementCount());
}
// If this isn't sizeof(vla), the result must be constant; use the constant
@@ -3317,7 +3314,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
E->getExprLoc()),
LHSTy);
Value *OldVal = Builder.CreateAtomicRMW(
- AtomicOp, LHSLV.getPointer(CGF), Amt,
+ AtomicOp, LHSLV.getAddress(CGF), Amt,
llvm::AtomicOrdering::SequentiallyConsistent);
// Since operation is atomic, the result type is guaranteed to be the
@@ -3688,8 +3685,8 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
index = CGF.Builder.CreateMul(index, objectSize);
- Value *result = CGF.Builder.CreateBitCast(pointer, CGF.VoidPtrTy);
- result = CGF.Builder.CreateGEP(CGF.Int8Ty, result, index, "add.ptr");
+ Value *result =
+ CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr");
return CGF.Builder.CreateBitCast(result, pointer->getType());
}
@@ -3719,10 +3716,12 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF,
// Explicitly handle GNU void* and function pointer arithmetic extensions. The
// GNU void* casts amount to no-ops since our void* type is i8*, but this is
// future proof.
+ llvm::Type *elemTy;
if (elementType->isVoidType() || elementType->isFunctionType())
- return CGF.Builder.CreateGEP(CGF.Int8Ty, pointer, index, "add.ptr");
+ elemTy = CGF.Int8Ty;
+ else
+ elemTy = CGF.ConvertTypeForMem(elementType);
- llvm::Type *elemTy = CGF.ConvertTypeForMem(elementType);
if (CGF.getLangOpts().isSignedOverflowDefined())
return CGF.Builder.CreateGEP(elemTy, pointer, index, "add.ptr");
@@ -3872,6 +3871,14 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
}
}
+ // For vector and matrix adds, try to fold into a fmuladd.
+ if (op.LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
+ // Try to form an fmuladd.
+ if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
+ return FMulAdd;
+ }
+
if (op.Ty->isConstantMatrixType()) {
llvm::MatrixBuilder MB(Builder);
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
@@ -3885,10 +3892,6 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
if (op.LHS->getType()->isFPOrFPVectorTy()) {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
- // Try to form an fmuladd.
- if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder))
- return FMulAdd;
-
return Builder.CreateFAdd(op.LHS, op.RHS, "add");
}
@@ -4022,6 +4025,14 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
}
}
+ // For vector and matrix subs, try to fold into a fmuladd.
+ if (op.LHS->getType()->isFPOrFPVectorTy()) {
+ CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
+ // Try to form an fmuladd.
+ if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
+ return FMulAdd;
+ }
+
if (op.Ty->isConstantMatrixType()) {
llvm::MatrixBuilder MB(Builder);
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
@@ -4035,9 +4046,6 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
if (op.LHS->getType()->isFPOrFPVectorTy()) {
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures);
- // Try to form an fmuladd.
- if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true))
- return FMulAdd;
return Builder.CreateFSub(op.LHS, op.RHS, "sub");
}
@@ -4856,7 +4864,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
}
if (condExpr->getType()->isVectorType() ||
- condExpr->getType()->isVLSTBuiltinType()) {
+ condExpr->getType()->isSveVLSBuiltinType()) {
CGF.incrementProfileCounter(E);
llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index 75fb06de9384..e465789a003e 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -23,8 +23,8 @@ using namespace CodeGen;
namespace {
llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
- llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
- llvm::Type::getInt8PtrTy(M.getContext())};
+ llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()),
+ llvm::PointerType::getUnqual(M.getContext())};
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
@@ -45,8 +45,8 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
const char *Name = "__llvm_omp_vprintf";
llvm::Module &M = CGM.getModule();
- llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
- llvm::Type::getInt8PtrTy(M.getContext()),
+ llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()),
+ llvm::PointerType::getUnqual(M.getContext()),
llvm::Type::getInt32Ty(M.getContext())};
llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
@@ -99,8 +99,9 @@ packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
// Construct and fill the args buffer that we'll pass to vprintf.
if (Args.size() <= 1) {
// If there are no args, pass a null pointer and size 0
- llvm::Value * BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
- return {BufferPtr, llvm::TypeSize::Fixed(0)};
+ llvm::Value *BufferPtr =
+ llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Ctx));
+ return {BufferPtr, llvm::TypeSize::getFixed(0)};
} else {
llvm::SmallVector<llvm::Type *, 8> ArgTypes;
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
@@ -120,7 +121,7 @@ packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, const CallArgList &Args) {
Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType()));
}
llvm::Value *BufferPtr =
- Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
+ Builder.CreatePointerCast(Alloca, llvm::PointerType::getUnqual(Ctx));
return {BufferPtr, DL.getTypeAllocSize(AllocaTy)};
}
}
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index e9fa273f21cc..c239bc17ef26 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -223,56 +223,6 @@ void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV,
ResourceMD->addOperand(Res.getMetadata());
}
-static llvm::hlsl::ResourceKind
-castResourceShapeToResourceKind(HLSLResourceAttr::ResourceKind RK) {
- switch (RK) {
- case HLSLResourceAttr::ResourceKind::Texture1D:
- return llvm::hlsl::ResourceKind::Texture1D;
- case HLSLResourceAttr::ResourceKind::Texture2D:
- return llvm::hlsl::ResourceKind::Texture2D;
- case HLSLResourceAttr::ResourceKind::Texture2DMS:
- return llvm::hlsl::ResourceKind::Texture2DMS;
- case HLSLResourceAttr::ResourceKind::Texture3D:
- return llvm::hlsl::ResourceKind::Texture3D;
- case HLSLResourceAttr::ResourceKind::TextureCube:
- return llvm::hlsl::ResourceKind::TextureCube;
- case HLSLResourceAttr::ResourceKind::Texture1DArray:
- return llvm::hlsl::ResourceKind::Texture1DArray;
- case HLSLResourceAttr::ResourceKind::Texture2DArray:
- return llvm::hlsl::ResourceKind::Texture2DArray;
- case HLSLResourceAttr::ResourceKind::Texture2DMSArray:
- return llvm::hlsl::ResourceKind::Texture2DMSArray;
- case HLSLResourceAttr::ResourceKind::TextureCubeArray:
- return llvm::hlsl::ResourceKind::TextureCubeArray;
- case HLSLResourceAttr::ResourceKind::TypedBuffer:
- return llvm::hlsl::ResourceKind::TypedBuffer;
- case HLSLResourceAttr::ResourceKind::RawBuffer:
- return llvm::hlsl::ResourceKind::RawBuffer;
- case HLSLResourceAttr::ResourceKind::StructuredBuffer:
- return llvm::hlsl::ResourceKind::StructuredBuffer;
- case HLSLResourceAttr::ResourceKind::CBufferKind:
- return llvm::hlsl::ResourceKind::CBuffer;
- case HLSLResourceAttr::ResourceKind::SamplerKind:
- return llvm::hlsl::ResourceKind::Sampler;
- case HLSLResourceAttr::ResourceKind::TBuffer:
- return llvm::hlsl::ResourceKind::TBuffer;
- case HLSLResourceAttr::ResourceKind::RTAccelerationStructure:
- return llvm::hlsl::ResourceKind::RTAccelerationStructure;
- case HLSLResourceAttr::ResourceKind::FeedbackTexture2D:
- return llvm::hlsl::ResourceKind::FeedbackTexture2D;
- case HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray:
- return llvm::hlsl::ResourceKind::FeedbackTexture2DArray;
- }
- // Make sure to update HLSLResourceAttr::ResourceKind when add new Kind to
- // hlsl::ResourceKind. Assume FeedbackTexture2DArray is the last enum for
- // HLSLResourceAttr::ResourceKind.
- static_assert(
- static_cast<uint32_t>(
- HLSLResourceAttr::ResourceKind::FeedbackTexture2DArray) ==
- (static_cast<uint32_t>(llvm::hlsl::ResourceKind::NumEntries) - 2));
- llvm_unreachable("all switch cases should be covered");
-}
-
void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) {
const Type *Ty = D->getType()->getPointeeOrArrayElementType();
if (!Ty)
@@ -284,15 +234,12 @@ void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) {
if (!Attr)
return;
- HLSLResourceAttr::ResourceClass RC = Attr->getResourceType();
- llvm::hlsl::ResourceKind RK =
- castResourceShapeToResourceKind(Attr->getResourceShape());
+ llvm::hlsl::ResourceClass RC = Attr->getResourceClass();
+ llvm::hlsl::ResourceKind RK = Attr->getResourceKind();
QualType QT(Ty, 0);
BufferResBinding Binding(D->getAttr<HLSLResourceBindingAttr>());
- addBufferResourceAnnotation(GV, QT.getAsString(),
- static_cast<llvm::hlsl::ResourceClass>(RC), RK,
- Binding);
+ addBufferResourceAnnotation(GV, QT.getAsString(), RC, RK, Binding);
}
CGHLSLRuntime::BufferResBinding::BufferResBinding(
diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp
index e5d9db273c2d..0d4800b90a2f 100644
--- a/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -440,6 +440,14 @@ MDNode *LoopInfo::createMetadata(
Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup}));
}
+ // Setting clang::code_align attribute.
+ if (Attrs.CodeAlign > 0) {
+ Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.align"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ llvm::Type::getInt32Ty(Ctx), Attrs.CodeAlign))};
+ LoopProperties.push_back(MDNode::get(Ctx, Vals));
+ }
+
LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(),
AdditionalLoopProperties.end());
return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms);
@@ -453,7 +461,7 @@ LoopAttributes::LoopAttributes(bool IsParallel)
VectorizeScalable(LoopAttributes::Unspecified), InterleaveCount(0),
UnrollCount(0), UnrollAndJamCount(0),
DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
- PipelineInitiationInterval(0), MustProgress(false) {}
+ PipelineInitiationInterval(0), CodeAlign(0), MustProgress(false) {}
void LoopAttributes::clear() {
IsParallel = false;
@@ -469,6 +477,7 @@ void LoopAttributes::clear() {
DistributeEnable = LoopAttributes::Unspecified;
PipelineDisabled = false;
PipelineInitiationInterval = 0;
+ CodeAlign = 0;
MustProgress = false;
}
@@ -493,8 +502,8 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified &&
- Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc &&
- !EndLoc && !Attrs.MustProgress)
+ Attrs.DistributeEnable == LoopAttributes::Unspecified &&
+ Attrs.CodeAlign == 0 && !StartLoc && !EndLoc && !Attrs.MustProgress)
return;
TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt);
@@ -788,6 +797,15 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
}
}
+ // Identify loop attribute 'code_align' from Attrs.
+ // For attribute code_align:
+ // n - 'llvm.loop.align i32 n' metadata will be emitted.
+ if (const auto *CodeAlign = getSpecificAttr<const CodeAlignAttr>(Attrs)) {
+ const auto *CE = cast<ConstantExpr>(CodeAlign->getAlignment());
+ llvm::APSInt ArgVal = CE->getResultAsAPSInt();
+ setCodeAlign(ArgVal.getSExtValue());
+ }
+
setMustProgress(MustProgress);
if (CGOpts.OptimizationLevel > 0)
diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h
index 856e892f712e..a1c8c7e5307f 100644
--- a/clang/lib/CodeGen/CGLoopInfo.h
+++ b/clang/lib/CodeGen/CGLoopInfo.h
@@ -79,6 +79,9 @@ struct LoopAttributes {
/// Value for llvm.loop.pipeline.iicount metadata.
unsigned PipelineInitiationInterval;
+ /// Value for 'llvm.loop.align' metadata.
+ unsigned CodeAlign;
+
/// Value for whether the loop is required to make progress.
bool MustProgress;
};
@@ -282,6 +285,9 @@ public:
StagedAttrs.PipelineInitiationInterval = C;
}
+ /// Set value of code align for the next loop pushed.
+ void setCodeAlign(unsigned C) { StagedAttrs.CodeAlign = C; }
+
/// Set no progress for the next loop pushed.
void setMustProgress(bool P) { StagedAttrs.MustProgress = P; }
diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp
index 3d2b1b8b2f78..75c1d7fbea84 100644
--- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp
+++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -313,7 +313,7 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM,
for (unsigned I = 0; I < N; ++I)
Params.push_back(ImplicitParamDecl::Create(
Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(ValNameStr[I]), ParamTy,
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
llvm::append_range(Args, Params);
@@ -367,8 +367,6 @@ template <class Derived> struct GenFuncBase {
CGF.Builder.CreateNUWMul(BaseEltSizeVal, NumElts);
llvm::Value *DstArrayEnd = CGF.Builder.CreateInBoundsGEP(
CGF.Int8Ty, DstAddr.getPointer(), SizeInBytes);
- DstArrayEnd = CGF.Builder.CreateBitCast(
- DstArrayEnd, CGF.CGM.Int8PtrPtrTy, "dstarray.end");
llvm::BasicBlock *PreheaderBB = CGF.Builder.GetInsertBlock();
// Create the header block and insert the phi instructions.
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 46c37eaea82b..acc85165a470 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -52,8 +52,7 @@ llvm::Value *CodeGenFunction::EmitObjCStringLiteral(const ObjCStringLiteral *E)
{
llvm::Constant *C =
CGM.getObjCRuntime().GenerateConstantString(E->getString()).getPointer();
- // FIXME: This bitcast should just be made an invariant on the Runtime.
- return llvm::ConstantExpr::getBitCast(C, ConvertType(E->getType()));
+ return C;
}
/// EmitObjCBoxedExpr - This routine generates code to call
@@ -149,9 +148,9 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
llvm::APInt APNumElements(Context.getTypeSize(Context.getSizeType()),
NumElements);
QualType ElementType = Context.getObjCIdType().withConst();
- QualType ElementArrayType
- = Context.getConstantArrayType(ElementType, APNumElements, nullptr,
- ArrayType::Normal, /*IndexTypeQuals=*/0);
+ QualType ElementArrayType = Context.getConstantArrayType(
+ ElementType, APNumElements, nullptr, ArraySizeModifier::Normal,
+ /*IndexTypeQuals=*/0);
// Allocate the temporary array(s).
Address Objects = CreateMemTemp(ElementArrayType, "objects");
@@ -222,6 +221,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
QualType ResultType = E->getType();
const ObjCObjectPointerType *InterfacePointerType
= ResultType->getAsObjCInterfacePointerType();
+ assert(InterfacePointerType && "Unexpected InterfacePointerType - null");
ObjCInterfaceDecl *Class
= InterfacePointerType->getObjectType()->getInterface();
CGObjCRuntime &Runtime = CGM.getObjCRuntime();
@@ -827,11 +827,8 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar,
// sizeof (Type of Ivar), isAtomic, false);
CallArgList args;
- llvm::Value *dest =
- CGF.Builder.CreateBitCast(CGF.ReturnValue.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *dest = CGF.ReturnValue.getPointer();
args.add(RValue::get(dest), Context.VoidPtrTy);
-
- src = CGF.Builder.CreateBitCast(src, CGF.VoidPtrTy);
args.add(RValue::get(src), Context.VoidPtrTy);
CharUnits size = CGF.getContext().getTypeSizeInChars(ivar->getType());
@@ -1098,7 +1095,6 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF,
llvm::Value *ivarAddr =
CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0)
.getPointer(CGF);
- ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy);
args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy);
// Third argument is the helper function.
@@ -1340,7 +1336,6 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD,
argVar->getType().getNonReferenceType(), VK_LValue,
SourceLocation());
llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF);
- argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
// The third argument is the sizeof the type.
@@ -1377,7 +1372,6 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
llvm::Value *ivarAddr =
CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0)
.getPointer(CGF);
- ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy);
args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy);
// The second argument is the address of the parameter variable.
@@ -1386,7 +1380,6 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
argVar->getType().getNonReferenceType(), VK_LValue,
SourceLocation());
llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(CGF);
- argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
// Third argument is the helper function.
@@ -1800,10 +1793,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
Selector FastEnumSel =
CGM.getContext().Selectors.getSelector(std::size(II), &II[0]);
- QualType ItemsTy =
- getContext().getConstantArrayType(getContext().getObjCIdType(),
- llvm::APInt(32, NumItems), nullptr,
- ArrayType::Normal, 0);
+ QualType ItemsTy = getContext().getConstantArrayType(
+ getContext().getObjCIdType(), llvm::APInt(32, NumItems), nullptr,
+ ArraySizeModifier::Normal, 0);
Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
// Emit the collection pointer. In ARC, we do a retain.
@@ -3686,7 +3678,6 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) {
/* constraints */ "r",
/* side effects */ true);
- object = Builder.CreateBitCast(object, VoidPtrTy);
EmitNounwindRuntimeCall(extender, object);
}
@@ -3710,7 +3701,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
CharUnits Alignment = C.getTypeAlignInChars(Ty);
llvm::Constant *Fn = getNonTrivialCStructMoveAssignmentOperator(
CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty);
- return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ return Fn;
}
if (!getLangOpts().CPlusPlus ||
@@ -3790,7 +3781,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
EmitStmt(TheCall);
FinishFunction();
- HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ HelperFn = Fn;
CGM.setAtomicSetterHelperFnMap(Ty, HelperFn);
return HelperFn;
}
@@ -3808,7 +3799,7 @@ llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
CharUnits Alignment = C.getTypeAlignInChars(Ty);
llvm::Constant *Fn = getNonTrivialCStructCopyConstructor(
CGM, Alignment, Alignment, Ty.isVolatileQualified(), Ty);
- return llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ return Fn;
}
if (!getLangOpts().CPlusPlus ||
@@ -3909,7 +3900,7 @@ llvm::Constant *CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap));
FinishFunction();
- HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
+ HelperFn = Fn;
CGM.setAtomicGetterHelperFnMap(Ty, HelperFn);
return HelperFn;
}
@@ -3953,7 +3944,7 @@ static unsigned getBaseMachOPlatformID(const llvm::Triple &TT) {
case llvm::Triple::DriverKit:
return llvm::MachO::PLATFORM_DRIVERKIT;
default:
- return /*Unknown platform*/ 0;
+ return llvm::MachO::PLATFORM_UNKNOWN;
}
}
diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp
index 09b6c3ac6adf..4ca1a8cce64d 100644
--- a/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -1014,8 +1014,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
if (CGM.getTriple().isOSBinFormatCOFF()) {
cast<llvm::GlobalValue>(isa)->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
}
- } else if (isa->getType() != PtrToIdTy)
- isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy);
+ }
// struct
// {
@@ -1108,10 +1107,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
std::pair<llvm::GlobalVariable*, int> v{ObjCStrGV, 0};
EarlyInitList.emplace_back(Sym, v);
}
- llvm::Constant *ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStrGV, IdTy);
- ObjCStrings[Str] = ObjCStr;
- ConstantStrings.push_back(ObjCStr);
- return ConstantAddress(ObjCStr, IdElemTy, Align);
+ ObjCStrings[Str] = ObjCStrGV;
+ ConstantStrings.push_back(ObjCStrGV);
+ return ConstantAddress(ObjCStrGV, IdElemTy, Align);
}
void PushProperty(ConstantArrayBuilder &PropertiesArray,
@@ -1193,9 +1191,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
ReferencedProtocols.end());
SmallVector<llvm::Constant *, 16> Protocols;
for (const auto *PI : RuntimeProtocols)
- Protocols.push_back(
- llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI),
- ProtocolPtrTy));
+ Protocols.push_back(GenerateProtocolRef(PI));
return GenerateProtocolList(Protocols);
}
@@ -1305,7 +1301,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
llvm::GlobalValue::ExternalLinkage, nullptr, Name);
GV->setAlignment(CGM.getPointerAlign().getAsAlign());
}
- return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy);
+ return GV;
}
/// Existing protocol references.
@@ -1322,9 +1318,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
std::string RefName = SymbolForProtocolRef(Name);
assert(!TheModule.getGlobalVariable(RefName));
// Emit a reference symbol.
- auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy,
- false, llvm::GlobalValue::LinkOnceODRLinkage,
- llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName);
+ auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy, false,
+ llvm::GlobalValue::LinkOnceODRLinkage,
+ Protocol, RefName);
GV->setComdat(TheModule.getOrInsertComdat(RefName));
GV->setSection(sectionName<ProtocolReferenceSection>());
GV->setAlignment(CGM.getPointerAlign().getAsAlign());
@@ -1381,9 +1377,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto RuntimeProtocols =
GetRuntimeProtocolList(PD->protocol_begin(), PD->protocol_end());
for (const auto *PI : RuntimeProtocols)
- Protocols.push_back(
- llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI),
- ProtocolPtrTy));
+ Protocols.push_back(GenerateProtocolRef(PI));
llvm::Constant *ProtocolList = GenerateProtocolList(Protocols);
// Collect information about methods
@@ -1420,19 +1414,13 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
GV->setSection(sectionName<ProtocolSection>());
GV->setComdat(TheModule.getOrInsertComdat(SymName));
if (OldGV) {
- OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV,
- OldGV->getType()));
+ OldGV->replaceAllUsesWith(GV);
OldGV->removeFromParent();
GV->setName(SymName);
}
Protocol = GV;
return GV;
}
- llvm::Constant *EnforceType(llvm::Constant *Val, llvm::Type *Ty) {
- if (Val->getType() == Ty)
- return Val;
- return llvm::ConstantExpr::getBitCast(Val, Ty);
- }
llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
const std::string &TypeEncoding) override {
return GetConstantSelector(Sel, TypeEncoding);
@@ -1469,7 +1457,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto SelVarName = (StringRef(".objc_selector_") + Sel.getAsString() + "_" +
MangledTypes).str();
if (auto *GV = TheModule.getNamedGlobal(SelVarName))
- return EnforceType(GV, SelectorTy);
+ return GV;
ConstantInitBuilder builder(CGM);
auto SelBuilder = builder.beginStruct();
SelBuilder.add(ExportUniqueString(Sel.getAsString(), ".objc_sel_name_",
@@ -1480,8 +1468,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
GV->setComdat(TheModule.getOrInsertComdat(SelVarName));
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
GV->setSection(sectionName<SelectorSection>());
- auto *SelVal = EnforceType(GV, SelectorTy);
- return SelVal;
+ return GV;
}
llvm::StructType *emptyStruct = nullptr;
@@ -1738,9 +1725,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
SmallVector<ObjCMethodDecl*, 16> ClassMethods;
ClassMethods.insert(ClassMethods.begin(), OID->classmeth_begin(),
OID->classmeth_end());
- metaclassFields.addBitCast(
- GenerateMethodList(className, "", ClassMethods, true),
- PtrTy);
+ metaclassFields.add(
+ GenerateMethodList(className, "", ClassMethods, true));
}
// void *dtable;
metaclassFields.addNullPointer(PtrTy);
@@ -1791,7 +1777,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
}
}
if (!IsCOFF)
- classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy));
+ classFields.add(SuperClass);
else
classFields.addNullPointer(PtrTy);
} else
@@ -1907,9 +1893,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
if (InstanceMethods.size() == 0)
classFields.addNullPointer(PtrTy);
else
- classFields.addBitCast(
- GenerateMethodList(className, "", InstanceMethods, false),
- PtrTy);
+ classFields.add(
+ GenerateMethodList(className, "", InstanceMethods, false));
+
// void *dtable;
classFields.addNullPointer(PtrTy);
// IMP cxx_construct;
@@ -1925,9 +1911,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
classDecl->protocol_end());
SmallVector<llvm::Constant *, 16> Protocols;
for (const auto *I : RuntimeProtocols)
- Protocols.push_back(
- llvm::ConstantExpr::getBitCast(GenerateProtocolRef(I),
- ProtocolPtrTy));
+ Protocols.push_back(GenerateProtocolRef(I));
+
if (Protocols.empty())
classFields.addNullPointer(PtrTy);
else
@@ -1945,7 +1930,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto *classRefSymbol = GetClassVar(className);
classRefSymbol->setSection(sectionName<ClassReferenceSection>());
- classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy));
+ classRefSymbol->setInitializer(classStruct);
if (IsCOFF) {
// we can't import a class struct.
@@ -1966,22 +1951,19 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
// Resolve the class aliases, if they exist.
// FIXME: Class pointer aliases shouldn't exist!
if (ClassPtrAlias) {
- ClassPtrAlias->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(classStruct, IdTy));
+ ClassPtrAlias->replaceAllUsesWith(classStruct);
ClassPtrAlias->eraseFromParent();
ClassPtrAlias = nullptr;
}
if (auto Placeholder =
TheModule.getNamedGlobal(SymbolForClass(className)))
if (Placeholder != classStruct) {
- Placeholder->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(classStruct, Placeholder->getType()));
+ Placeholder->replaceAllUsesWith(classStruct);
Placeholder->eraseFromParent();
classStruct->setName(SymbolForClass(className));
}
if (MetaClassPtrAlias) {
- MetaClassPtrAlias->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(metaclass, IdTy));
+ MetaClassPtrAlias->replaceAllUsesWith(metaclass);
MetaClassPtrAlias->eraseFromParent();
MetaClassPtrAlias = nullptr;
}
@@ -2422,7 +2404,7 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
false,
llvm::GlobalValue::ExternalLinkage,
nullptr, "__objc_id_type_info");
- return llvm::ConstantExpr::getBitCast(IDEHType, PtrToInt8Ty);
+ return IDEHType;
}
const ObjCObjectPointerType *PT =
@@ -2436,9 +2418,8 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
std::string typeinfoName = "__objc_eh_typeinfo_" + className;
// Return the existing typeinfo if it exists
- llvm::Constant *typeinfo = TheModule.getGlobalVariable(typeinfoName);
- if (typeinfo)
- return llvm::ConstantExpr::getBitCast(typeinfo, PtrToInt8Ty);
+ if (llvm::Constant *typeinfo = TheModule.getGlobalVariable(typeinfoName))
+ return typeinfo;
// Otherwise create it.
@@ -2453,9 +2434,8 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
nullptr, vtableName);
}
llvm::Constant *Two = llvm::ConstantInt::get(IntTy, 2);
- auto *BVtable = llvm::ConstantExpr::getBitCast(
- llvm::ConstantExpr::getGetElementPtr(Vtable->getValueType(), Vtable, Two),
- PtrToInt8Ty);
+ auto *BVtable =
+ llvm::ConstantExpr::getGetElementPtr(Vtable->getValueType(), Vtable, Two);
llvm::Constant *typeName =
ExportUniqueString(className, "__objc_eh_typename_");
@@ -2469,7 +2449,7 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
CGM.getPointerAlign(),
/*constant*/ false,
llvm::GlobalValue::LinkOnceODRLinkage);
- return llvm::ConstantExpr::getBitCast(TI, PtrToInt8Ty);
+ return TI;
}
/// Generate an NSConstantString object.
@@ -2493,19 +2473,16 @@ ConstantAddress CGObjCGNU::GenerateConstantString(const StringLiteral *SL) {
llvm::Constant *isa = TheModule.getNamedGlobal(Sym);
if (!isa)
- isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false,
- llvm::GlobalValue::ExternalWeakLinkage, nullptr, Sym);
- else if (isa->getType() != PtrToIdTy)
- isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy);
+ isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */ false,
+ llvm::GlobalValue::ExternalWeakLinkage,
+ nullptr, Sym);
ConstantInitBuilder Builder(CGM);
auto Fields = Builder.beginStruct();
Fields.add(isa);
Fields.add(MakeConstantString(Str));
Fields.addInt(IntTy, Str.size());
- llvm::Constant *ObjCStr =
- Fields.finishAndCreateGlobal(".objc_str", Align);
- ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStr, PtrToInt8Ty);
+ llvm::Constant *ObjCStr = Fields.finishAndCreateGlobal(".objc_str", Align);
ObjCStrings[Str] = ObjCStr;
ConstantStrings.push_back(ObjCStr);
return ConstantAddress(ObjCStr, Int8Ty, Align);
@@ -2909,14 +2886,14 @@ GenerateMethodList(StringRef ClassName,
assert(FnPtr && "Can't generate metadata for method that doesn't exist");
auto Method = MethodArray.beginStruct(ObjCMethodTy);
if (isV2ABI) {
- Method.addBitCast(FnPtr, IMPTy);
+ Method.add(FnPtr);
Method.add(GetConstantSelector(OMD->getSelector(),
Context.getObjCEncodingForMethodDecl(OMD)));
Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD, true)));
} else {
Method.add(MakeConstantString(OMD->getSelector().getAsString()));
Method.add(MakeConstantString(Context.getObjCEncodingForMethodDecl(OMD)));
- Method.addBitCast(FnPtr, IMPTy);
+ Method.add(FnPtr);
}
Method.finishAndAddTo(MethodArray);
}
@@ -3015,7 +2992,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
// Fill in the structure
// isa
- Elements.addBitCast(MetaClass, PtrToInt8Ty);
+ Elements.add(MetaClass);
// super_class
Elements.add(SuperClass);
// name
@@ -3044,7 +3021,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
// sibling_class
Elements.add(NULLPtr);
// protocols
- Elements.addBitCast(Protocols, PtrTy);
+ Elements.add(Protocols);
// gc_object_type
Elements.add(NULLPtr);
// abi_version
@@ -3068,8 +3045,7 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
Elements.finishAndCreateGlobal(ClassSym, CGM.getPointerAlign(), false,
llvm::GlobalValue::ExternalLinkage);
if (ClassRef) {
- ClassRef->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(Class,
- ClassRef->getType()));
+ ClassRef->replaceAllUsesWith(Class);
ClassRef->removeFromParent();
Class->setName(ClassSym);
}
@@ -3117,7 +3093,7 @@ CGObjCGNU::GenerateProtocolList(ArrayRef<std::string> Protocols) {
} else {
protocol = value->getValue();
}
- Elements.addBitCast(protocol, PtrToInt8Ty);
+ Elements.add(protocol);
}
Elements.finishAndAddTo(ProtocolList);
return ProtocolList.finishAndCreateGlobal(".objc_protocol_list",
@@ -3144,7 +3120,6 @@ llvm::Constant *
CGObjCGNU::GenerateEmptyProtocol(StringRef ProtocolName) {
llvm::Constant *ProtocolList = GenerateProtocolList({});
llvm::Constant *MethodList = GenerateProtocolMethodList({});
- MethodList = llvm::ConstantExpr::getBitCast(MethodList, PtrToInt8Ty);
// Protocols are objects containing lists of the methods implemented and
// protocols adopted.
ConstantInitBuilder Builder(CGM);
@@ -3235,9 +3210,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
Elements.add(PropertyList);
Elements.add(OptionalPropertyList);
ExistingProtocols[ProtocolName] =
- llvm::ConstantExpr::getBitCast(
- Elements.finishAndCreateGlobal(".objc_protocol", CGM.getPointerAlign()),
- IdTy);
+ Elements.finishAndCreateGlobal(".objc_protocol", CGM.getPointerAlign());
}
void CGObjCGNU::GenerateProtocolHolderCategory() {
// Collect information about instance methods
@@ -3250,11 +3223,9 @@ void CGObjCGNU::GenerateProtocolHolderCategory() {
Elements.add(MakeConstantString(CategoryName));
Elements.add(MakeConstantString(ClassName));
// Instance method list
- Elements.addBitCast(GenerateMethodList(
- ClassName, CategoryName, {}, false), PtrTy);
+ Elements.add(GenerateMethodList(ClassName, CategoryName, {}, false));
// Class method list
- Elements.addBitCast(GenerateMethodList(
- ClassName, CategoryName, {}, true), PtrTy);
+ Elements.add(GenerateMethodList(ClassName, CategoryName, {}, true));
// Protocol list
ConstantInitBuilder ProtocolListBuilder(CGM);
@@ -3264,16 +3235,13 @@ void CGObjCGNU::GenerateProtocolHolderCategory() {
auto ProtocolElements = ProtocolList.beginArray(PtrTy);
for (auto iter = ExistingProtocols.begin(), endIter = ExistingProtocols.end();
iter != endIter ; iter++) {
- ProtocolElements.addBitCast(iter->getValue(), PtrTy);
+ ProtocolElements.add(iter->getValue());
}
ProtocolElements.finishAndAddTo(ProtocolList);
- Elements.addBitCast(
- ProtocolList.finishAndCreateGlobal(".objc_protocol_list",
- CGM.getPointerAlign()),
- PtrTy);
- Categories.push_back(llvm::ConstantExpr::getBitCast(
- Elements.finishAndCreateGlobal("", CGM.getPointerAlign()),
- PtrTy));
+ Elements.add(ProtocolList.finishAndCreateGlobal(".objc_protocol_list",
+ CGM.getPointerAlign()));
+ Categories.push_back(
+ Elements.finishAndCreateGlobal("", CGM.getPointerAlign()));
}
/// Libobjc2 uses a bitfield representation where small(ish) bitfields are
@@ -3348,38 +3316,35 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
SmallVector<ObjCMethodDecl*, 16> InstanceMethods;
InstanceMethods.insert(InstanceMethods.begin(), OCD->instmeth_begin(),
OCD->instmeth_end());
- Elements.addBitCast(
- GenerateMethodList(ClassName, CategoryName, InstanceMethods, false),
- PtrTy);
+ Elements.add(
+ GenerateMethodList(ClassName, CategoryName, InstanceMethods, false));
+
// Class method list
SmallVector<ObjCMethodDecl*, 16> ClassMethods;
ClassMethods.insert(ClassMethods.begin(), OCD->classmeth_begin(),
OCD->classmeth_end());
- Elements.addBitCast(
- GenerateMethodList(ClassName, CategoryName, ClassMethods, true),
- PtrTy);
+ Elements.add(GenerateMethodList(ClassName, CategoryName, ClassMethods, true));
+
// Protocol list
- Elements.addBitCast(GenerateCategoryProtocolList(CatDecl), PtrTy);
+ Elements.add(GenerateCategoryProtocolList(CatDecl));
if (isRuntime(ObjCRuntime::GNUstep, 2)) {
const ObjCCategoryDecl *Category =
Class->FindCategoryDeclaration(OCD->getIdentifier());
if (Category) {
// Instance properties
- Elements.addBitCast(GeneratePropertyList(OCD, Category, false), PtrTy);
+ Elements.add(GeneratePropertyList(OCD, Category, false));
// Class properties
- Elements.addBitCast(GeneratePropertyList(OCD, Category, true), PtrTy);
+ Elements.add(GeneratePropertyList(OCD, Category, true));
} else {
Elements.addNullPointer(PtrTy);
Elements.addNullPointer(PtrTy);
}
}
- Categories.push_back(llvm::ConstantExpr::getBitCast(
- Elements.finishAndCreateGlobal(
- std::string(".objc_category_")+ClassName+CategoryName,
- CGM.getPointerAlign()),
- PtrTy));
+ Categories.push_back(Elements.finishAndCreateGlobal(
+ std::string(".objc_category_") + ClassName + CategoryName,
+ CGM.getPointerAlign()));
}
llvm::Constant *CGObjCGNU::GeneratePropertyList(const Decl *Container,
@@ -3682,20 +3647,17 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
// Resolve the class aliases, if they exist.
if (ClassPtrAlias) {
- ClassPtrAlias->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(ClassStruct, IdTy));
+ ClassPtrAlias->replaceAllUsesWith(ClassStruct);
ClassPtrAlias->eraseFromParent();
ClassPtrAlias = nullptr;
}
if (MetaClassPtrAlias) {
- MetaClassPtrAlias->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(MetaClassStruct, IdTy));
+ MetaClassPtrAlias->replaceAllUsesWith(MetaClassStruct);
MetaClassPtrAlias->eraseFromParent();
MetaClassPtrAlias = nullptr;
}
// Add class structure to list to be added to the symtab later
- ClassStruct = llvm::ConstantExpr::getBitCast(ClassStruct, PtrToInt8Ty);
Classes.push_back(ClassStruct);
}
@@ -3709,11 +3671,9 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
GenerateProtocolHolderCategory();
llvm::StructType *selStructTy = dyn_cast<llvm::StructType>(SelectorElemTy);
- llvm::Type *selStructPtrTy = SelectorTy;
if (!selStructTy) {
selStructTy = llvm::StructType::get(CGM.getLLVMContext(),
{ PtrToInt8Ty, PtrToInt8Ty });
- selStructPtrTy = llvm::PointerType::getUnqual(selStructTy);
}
// Generate statics list:
@@ -3744,7 +3704,6 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
statics = allStaticsArray.finishAndCreateGlobal(".objc_statics_ptr",
CGM.getPointerAlign());
- statics = llvm::ConstantExpr::getBitCast(statics, PtrTy);
}
// Array of classes, categories, and constant objects.
@@ -3807,9 +3766,6 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
// FIXME: We're generating redundant loads and stores here!
llvm::Constant *selPtr = llvm::ConstantExpr::getGetElementPtr(
selectorList->getValueType(), selectorList, idxs);
- // If selectors are defined as an opaque type, cast the pointer to this
- // type.
- selPtr = llvm::ConstantExpr::getBitCast(selPtr, SelectorTy);
selectorAliases[i]->replaceAllUsesWith(selPtr);
selectorAliases[i]->eraseFromParent();
}
@@ -3821,7 +3777,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
// Number of static selectors
symtab.addInt(LongTy, selectorCount);
- symtab.addBitCast(selectorList, selStructPtrTy);
+ symtab.add(selectorList);
// Number of classes defined.
symtab.addInt(CGM.Int16Ty, Classes.size());
@@ -3930,7 +3886,6 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
llvm::Constant *TheClass =
TheModule.getGlobalVariable("_OBJC_CLASS_" + iter->first, true);
if (TheClass) {
- TheClass = llvm::ConstantExpr::getBitCast(TheClass, PtrTy);
Builder.CreateCall(RegisterAlias,
{TheClass, MakeConstantString(iter->second)});
}
@@ -4123,9 +4078,9 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
// when linked against code which isn't (most of the time).
llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name);
if (!IvarOffsetPointer)
- IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
- llvm::Type::getInt32PtrTy(VMContext), false,
- llvm::GlobalValue::ExternalLinkage, nullptr, Name);
+ IvarOffsetPointer = new llvm::GlobalVariable(
+ TheModule, llvm::PointerType::getUnqual(VMContext), false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, Name);
return IvarOffsetPointer;
}
@@ -4169,10 +4124,11 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
CGF.CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment())
return CGF.Builder.CreateZExtOrBitCast(
CGF.Builder.CreateAlignedLoad(
- Int32Ty, CGF.Builder.CreateAlignedLoad(
- llvm::Type::getInt32PtrTy(VMContext),
- ObjCIvarOffsetVariable(Interface, Ivar),
- CGF.getPointerAlign(), "ivar"),
+ Int32Ty,
+ CGF.Builder.CreateAlignedLoad(
+ llvm::PointerType::getUnqual(VMContext),
+ ObjCIvarOffsetVariable(Interface, Ivar),
+ CGF.getPointerAlign(), "ivar"),
CharUnits::fromQuantity(4)),
PtrDiffTy);
std::string name = "__objc_ivar_offset_value_" +
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index 32f4f411347a..ba52b23be018 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -1713,8 +1713,8 @@ public:
/// A helper class for performing the null-initialization of a return
/// value.
struct NullReturnState {
- llvm::BasicBlock *NullBB;
- NullReturnState() : NullBB(nullptr) {}
+ llvm::BasicBlock *NullBB = nullptr;
+ NullReturnState() = default;
/// Perform a null-check of the given receiver.
void init(CodeGenFunction &CGF, llvm::Value *receiver) {
@@ -1958,9 +1958,8 @@ llvm::Constant *CGObjCMac::getNSConstantStringClassRef() {
llvm::Type *PTy = llvm::ArrayType::get(CGM.IntTy, 0);
auto GV = CGM.CreateRuntimeVariable(PTy, str);
- auto V = llvm::ConstantExpr::getBitCast(GV, CGM.IntTy->getPointerTo());
- ConstantStringClassRef = V;
- return V;
+ ConstantStringClassRef = GV;
+ return GV;
}
llvm::Constant *CGObjCNonFragileABIMac::getNSConstantStringClassRef() {
@@ -1972,12 +1971,8 @@ llvm::Constant *CGObjCNonFragileABIMac::getNSConstantStringClassRef() {
StringClass.empty() ? "OBJC_CLASS_$_NSConstantString"
: "OBJC_CLASS_$_" + StringClass;
llvm::Constant *GV = GetClassGlobal(str, NotForDefinition);
-
- // Make sure the result is of the correct type.
- auto V = llvm::ConstantExpr::getBitCast(GV, CGM.IntTy->getPointerTo());
-
- ConstantStringClassRef = V;
- return V;
+ ConstantStringClassRef = GV;
+ return GV;
}
ConstantAddress
@@ -1996,11 +1991,8 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) {
// If we don't already have it, construct the type for a constant NSString.
if (!NSConstantStringType) {
NSConstantStringType =
- llvm::StructType::create({
- CGM.Int32Ty->getPointerTo(),
- CGM.Int8PtrTy,
- CGM.IntTy
- }, "struct.__builtin_NSString");
+ llvm::StructType::create({CGM.UnqualPtrTy, CGM.Int8PtrTy, CGM.IntTy},
+ "struct.__builtin_NSString");
}
ConstantInitBuilder Builder(CGM);
@@ -2022,7 +2014,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) {
// Don't enforce the target's minimum global alignment, since the only use
// of the string is via this class initializer.
GV->setAlignment(llvm::Align(1));
- Fields.addBitCast(GV, CGM.Int8PtrTy);
+ Fields.add(GV);
// String length.
Fields.addInt(CGM.IntTy, StringLength);
@@ -2969,8 +2961,7 @@ llvm::Value *CGObjCMac::GenerateProtocolRef(CodeGenFunction &CGF,
// resolved. Investigate. Its also wasteful to look this up over and over.
LazySymbols.insert(&CGM.getContext().Idents.get("Protocol"));
- return llvm::ConstantExpr::getBitCast(GetProtocolRef(PD),
- ObjCTypes.getExternalProtocolPtrTy());
+ return GetProtocolRef(PD);
}
void CGObjCCommonMac::GenerateProtocol(const ObjCProtocolDecl *PD) {
@@ -3190,7 +3181,7 @@ CGObjCMac::EmitProtocolList(Twine name,
llvm::GlobalVariable *GV =
CreateMetadataVar(name, values, section, CGM.getPointerAlign(), false);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.ProtocolListPtrTy);
+ return GV;
}
static void
@@ -3298,7 +3289,7 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
llvm::GlobalVariable *GV =
CreateMetadataVar(Name, values, Section, CGM.getPointerAlign(), true);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.PropertyListPtrTy);
+ return GV;
}
llvm::Constant *
@@ -3319,7 +3310,7 @@ CGObjCCommonMac::EmitProtocolMethodTypes(Twine Name,
llvm::GlobalVariable *GV =
CreateMetadataVar(Name, Init, Section, CGM.getPointerAlign(), true);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.Int8PtrPtrTy);
+ return GV;
}
/*
@@ -3329,7 +3320,7 @@ CGObjCCommonMac::EmitProtocolMethodTypes(Twine Name,
struct _objc_method_list *instance_methods;
struct _objc_method_list *class_methods;
struct _objc_protocol_list *protocols;
- uint32_t size; // <rdar://4585769>
+ uint32_t size; // sizeof(struct _objc_category)
struct _objc_property_list *instance_properties;
struct _objc_property_list *class_properties;
};
@@ -3566,8 +3557,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
// Record a reference to the super class.
LazySymbols.insert(Super->getIdentifier());
- values.addBitCast(GetClassName(Super->getObjCRuntimeNameAsString()),
- ObjCTypes.ClassPtrTy);
+ values.add(GetClassName(Super->getObjCRuntimeNameAsString()));
} else {
values.addNullPointer(ObjCTypes.ClassPtrTy);
}
@@ -3621,14 +3611,12 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID,
const ObjCInterfaceDecl *Root = ID->getClassInterface();
while (const ObjCInterfaceDecl *Super = Root->getSuperClass())
Root = Super;
- values.addBitCast(GetClassName(Root->getObjCRuntimeNameAsString()),
- ObjCTypes.ClassPtrTy);
+ values.add(GetClassName(Root->getObjCRuntimeNameAsString()));
// The super class for the metaclass is emitted as the name of the
// super class. The runtime fixes this up to point to the
// *metaclass* for the super class.
if (ObjCInterfaceDecl *Super = ID->getClassInterface()->getSuperClass()) {
- values.addBitCast(GetClassName(Super->getObjCRuntimeNameAsString()),
- ObjCTypes.ClassPtrTy);
+ values.add(GetClassName(Super->getObjCRuntimeNameAsString()));
} else {
values.addNullPointer(ObjCTypes.ClassPtrTy);
}
@@ -3812,7 +3800,7 @@ llvm::Constant *CGObjCMac::EmitIvarList(const ObjCImplementationDecl *ID,
GV = CreateMetadataVar("OBJC_INSTANCE_VARIABLES_" + ID->getName(), ivarList,
"__OBJC,__instance_vars,regular,no_dead_strip",
CGM.getPointerAlign(), true);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListPtrTy);
+ return GV;
}
/// Build a struct objc_method_description constant for the given method.
@@ -3824,8 +3812,7 @@ llvm::Constant *CGObjCMac::EmitIvarList(const ObjCImplementationDecl *ID,
void CGObjCMac::emitMethodDescriptionConstant(ConstantArrayBuilder &builder,
const ObjCMethodDecl *MD) {
auto description = builder.beginStruct(ObjCTypes.MethodDescriptionTy);
- description.addBitCast(GetMethodVarName(MD->getSelector()),
- ObjCTypes.SelectorPtrTy);
+ description.add(GetMethodVarName(MD->getSelector()));
description.add(GetMethodVarType(MD));
description.finishAndAddTo(builder);
}
@@ -3843,10 +3830,9 @@ void CGObjCMac::emitMethodConstant(ConstantArrayBuilder &builder,
assert(fn && "no definition registered for method");
auto method = builder.beginStruct(ObjCTypes.MethodTy);
- method.addBitCast(GetMethodVarName(MD->getSelector()),
- ObjCTypes.SelectorPtrTy);
+ method.add(GetMethodVarName(MD->getSelector()));
method.add(GetMethodVarType(MD));
- method.addBitCast(fn, ObjCTypes.Int8PtrTy);
+ method.add(fn);
method.finishAndAddTo(builder);
}
@@ -3931,8 +3917,7 @@ llvm::Constant *CGObjCMac::emitMethodList(Twine name, MethodListType MLT,
llvm::GlobalVariable *GV = CreateMetadataVar(prefix + name, values, section,
CGM.getPointerAlign(), true);
- return llvm::ConstantExpr::getBitCast(GV,
- ObjCTypes.MethodDescriptionListPtrTy);
+ return GV;
}
// Otherwise, it's an objc_method_list.
@@ -3949,7 +3934,7 @@ llvm::Constant *CGObjCMac::emitMethodList(Twine name, MethodListType MLT,
llvm::GlobalVariable *GV = CreateMetadataVar(prefix + name, values, section,
CGM.getPointerAlign(), true);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListPtrTy);
+ return GV;
}
llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD,
@@ -4004,8 +3989,7 @@ CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD,
Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage,
"", &CGM.getModule());
Fn->takeName(OldFn);
- OldFn->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(Fn, OldFn->getType()));
+ OldFn->replaceAllUsesWith(Fn);
OldFn->eraseFromParent();
// Replace the cached function in the map.
@@ -4486,14 +4470,10 @@ llvm::FunctionType *FragileHazards::GetAsmFnType() {
want to implement correct ObjC/C++ exception interactions for the
fragile ABI.
- Note that for this use of setjmp/longjmp to be correct, we may need
- to mark some local variables volatile: if a non-volatile local
- variable is modified between the setjmp and the longjmp, it has
- indeterminate value. For the purposes of LLVM IR, it may be
- sufficient to make loads and stores within the @try (to variables
- declared outside the @try) volatile. This is necessary for
- optimized correctness, but is not currently being done; this is
- being tracked as rdar://problem/8160285
+ Note that for this use of setjmp/longjmp to be correct in the presence of
+ optimization, we use inline assembly on the set of local variables to force
+ flushing locals to memory immediately before any protected calls and to
+ inhibit optimizing locals across the setjmp->catch edge.
The basic framework for a @try-catch-finally is as follows:
{
@@ -5092,7 +5072,8 @@ enum ImageInfoFlags {
eImageInfo_OptimizedByDyld = (1 << 3), // This flag is set by the dyld shared cache.
// A flag indicating that the module has no instances of a @synthesize of a
- // superclass variable. <rdar://problem/6803242>
+ // superclass variable. This flag used to be consumed by the runtime to work
+ // around miscompile by gcc.
eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang.
eImageInfo_ImageIsSimulated = (1 << 5),
eImageInfo_ClassProperties = (1 << 6)
@@ -5205,17 +5186,17 @@ llvm::Constant *CGObjCMac::EmitModuleSymbols() {
if (ID->isWeakImported() && !IMP->isWeakImported())
DefinedClasses[i]->setLinkage(llvm::GlobalVariable::ExternalLinkage);
- array.addBitCast(DefinedClasses[i], ObjCTypes.Int8PtrTy);
+ array.add(DefinedClasses[i]);
}
for (unsigned i=0; i<NumCategories; i++)
- array.addBitCast(DefinedCategories[i], ObjCTypes.Int8PtrTy);
+ array.add(DefinedCategories[i]);
array.finishAndAddTo(values);
llvm::GlobalVariable *GV = CreateMetadataVar(
"OBJC_SYMBOLS", values, "__OBJC,__symbols,regular,no_dead_strip",
CGM.getPointerAlign(), true);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.SymtabPtrTy);
+ return GV;
}
llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF,
@@ -5225,13 +5206,10 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF,
llvm::GlobalVariable *&Entry = ClassReferences[II];
if (!Entry) {
- llvm::Constant *Casted =
- llvm::ConstantExpr::getBitCast(GetClassName(II->getName()),
- ObjCTypes.ClassPtrTy);
- Entry = CreateMetadataVar(
- "OBJC_CLASS_REFERENCES_", Casted,
- "__OBJC,__cls_refs,literal_pointers,no_dead_strip",
- CGM.getPointerAlign(), true);
+ Entry =
+ CreateMetadataVar("OBJC_CLASS_REFERENCES_", GetClassName(II->getName()),
+ "__OBJC,__cls_refs,literal_pointers,no_dead_strip",
+ CGM.getPointerAlign(), true);
}
return CGF.Builder.CreateAlignedLoad(Entry->getValueType(), Entry,
@@ -5264,11 +5242,8 @@ Address CGObjCMac::EmitSelectorAddr(Selector Sel) {
llvm::GlobalVariable *&Entry = SelectorReferences[Sel];
if (!Entry) {
- llvm::Constant *Casted =
- llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel),
- ObjCTypes.SelectorPtrTy);
Entry = CreateMetadataVar(
- "OBJC_SELECTOR_REFERENCES_", Casted,
+ "OBJC_SELECTOR_REFERENCES_", GetMethodVarName(Sel),
"__OBJC,__message_refs,literal_pointers,no_dead_strip", Align, true);
Entry->setExternallyInitialized(true);
}
@@ -5768,10 +5743,9 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
// id self;
// Class cls;
// }
- RecordDecl *RD = RecordDecl::Create(Ctx, TTK_Struct,
- Ctx.getTranslationUnitDecl(),
- SourceLocation(), SourceLocation(),
- &Ctx.Idents.get("_objc_super"));
+ RecordDecl *RD = RecordDecl::Create(
+ Ctx, TagTypeKind::Struct, Ctx.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), &Ctx.Idents.get("_objc_super"));
RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
nullptr, Ctx.getObjCIdType(), nullptr, nullptr,
false, ICIS_NoInit));
@@ -6121,10 +6095,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
// };
// First the clang type for struct _message_ref_t
- RecordDecl *RD = RecordDecl::Create(Ctx, TTK_Struct,
- Ctx.getTranslationUnitDecl(),
- SourceLocation(), SourceLocation(),
- &Ctx.Idents.get("_message_ref_t"));
+ RecordDecl *RD = RecordDecl::Create(
+ Ctx, TagTypeKind::Struct, Ctx.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), &Ctx.Idents.get("_message_ref_t"));
RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
nullptr, Ctx.VoidPtrTy, nullptr, nullptr, false,
ICIS_NoInit));
@@ -6179,8 +6152,8 @@ void CGObjCNonFragileABIMac::AddModuleClassList(
SmallVector<llvm::Constant*, 8> Symbols(NumClasses);
for (unsigned i=0; i<NumClasses; i++)
- Symbols[i] = llvm::ConstantExpr::getBitCast(Container[i],
- ObjCTypes.Int8PtrTy);
+ Symbols[i] = Container[i];
+
llvm::Constant *Init =
llvm::ConstantArray::get(llvm::ArrayType::get(ObjCTypes.Int8PtrTy,
Symbols.size()),
@@ -6616,9 +6589,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
// of protocol's meta-data (not a reference to it!)
assert(!PD->isNonRuntimeProtocol() &&
"attempting to get a protocol ref to a static protocol.");
- llvm::Constant *Init =
- llvm::ConstantExpr::getBitCast(GetOrEmitProtocol(PD),
- ObjCTypes.getExternalProtocolPtrTy());
+ llvm::Constant *Init = GetOrEmitProtocol(PD);
std::string ProtocolName("_OBJC_PROTOCOL_REFERENCE_$_");
ProtocolName += PD->getObjCRuntimeNameAsString();
@@ -6759,8 +6730,7 @@ void CGObjCNonFragileABIMac::emitMethodConstant(ConstantArrayBuilder &builder,
const ObjCMethodDecl *MD,
bool forProtocol) {
auto method = builder.beginStruct(ObjCTypes.MethodTy);
- method.addBitCast(GetMethodVarName(MD->getSelector()),
- ObjCTypes.SelectorPtrTy);
+ method.add(GetMethodVarName(MD->getSelector()));
method.add(GetMethodVarType(MD));
if (forProtocol) {
@@ -6769,7 +6739,7 @@ void CGObjCNonFragileABIMac::emitMethodConstant(ConstantArrayBuilder &builder,
} else {
llvm::Function *fn = GetMethodDefinition(MD);
assert(fn && "no definition for method?");
- method.addBitCast(fn, ObjCTypes.Int8PtrProgramASTy);
+ method.add(fn);
}
method.finishAndAddTo(builder);
@@ -6843,7 +6813,7 @@ CGObjCNonFragileABIMac::emitMethodList(Twine name, MethodListType kind,
llvm::GlobalVariable *GV = finishAndCreateGlobal(values, prefix + name, CGM);
CGM.addCompilerUsedGlobal(GV);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListnfABIPtrTy);
+ return GV;
}
/// ObjCIvarOffsetVariable - Returns the ivar offset variable for
@@ -6985,7 +6955,7 @@ llvm::Constant *CGObjCNonFragileABIMac::EmitIvarList(
llvm::GlobalVariable *GV = finishAndCreateGlobal(
ivarList, Prefix + OID->getObjCRuntimeNameAsString(), CGM);
CGM.addCompilerUsedGlobal(GV);
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListnfABIPtrTy);
+ return GV;
}
llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
@@ -7159,7 +7129,7 @@ CGObjCNonFragileABIMac::EmitProtocolList(Twine Name,
llvm::GlobalVariable *GV =
CGM.getModule().getGlobalVariable(TmpName.str(), true);
if (GV)
- return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.ProtocolListnfABIPtrTy);
+ return GV;
ConstantInitBuilder builder(CGM);
auto values = builder.beginStruct();
@@ -7177,8 +7147,7 @@ CGObjCNonFragileABIMac::EmitProtocolList(Twine Name,
GV = finishAndCreateGlobal(values, Name, CGM);
CGM.addCompilerUsedGlobal(GV);
- return llvm::ConstantExpr::getBitCast(GV,
- ObjCTypes.ProtocolListnfABIPtrTy);
+ return GV;
}
/// EmitObjCValueForIvar - Code Gen for nonfragile ivar reference.
@@ -7412,8 +7381,7 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name,
NewGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
if (GV) {
- GV->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(NewGV, GV->getType()));
+ GV->replaceAllUsesWith(NewGV);
GV->eraseFromParent();
}
GV = NewGV;
@@ -7624,14 +7592,11 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(Selector Sel) {
llvm::GlobalVariable *&Entry = SelectorReferences[Sel];
CharUnits Align = CGM.getPointerAlign();
if (!Entry) {
- llvm::Constant *Casted =
- llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel),
- ObjCTypes.SelectorPtrTy);
std::string SectionName =
GetSectionName("__objc_selrefs", "literal_pointers,no_dead_strip");
Entry = new llvm::GlobalVariable(
CGM.getModule(), ObjCTypes.SelectorPtrTy, false,
- getLinkageTypeForObjCMetadata(CGM, SectionName), Casted,
+ getLinkageTypeForObjCMetadata(CGM, SectionName), GetMethodVarName(Sel),
"OBJC_SELECTOR_REFERENCES_");
Entry->setExternallyInitialized(true);
Entry->setSection(SectionName);
diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp
index 634a3d5a938d..424564f97599 100644
--- a/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -63,12 +63,10 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
CGF.CGM.getContext().getObjCObjectPointerType(InterfaceTy);
QualType IvarTy =
Ivar->getUsageType(ObjectPtrTy).withCVRQualifiers(CVRQualifiers);
- llvm::Type *LTy = CGF.CGM.getTypes().ConvertTypeForMem(IvarTy);
- llvm::Value *V = CGF.Builder.CreateBitCast(BaseValue, CGF.Int8PtrTy);
+ llvm::Value *V = BaseValue;
V = CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, V, Offset, "add.ptr");
if (!Ivar->isBitField()) {
- V = CGF.Builder.CreateBitCast(V, llvm::PointerType::getUnqual(LTy));
LValue LV = CGF.MakeNaturalAlignAddrLValue(V, IvarTy);
return LV;
}
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index dc2330a29976..115b618056a4 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -37,44 +37,16 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
if (llvm::Type *TransTy = CGM.getTargetCodeGenInfo().getOpenCLType(CGM, T))
return TransTy;
- switch (cast<BuiltinType>(T)->getKind()) {
- default:
- llvm_unreachable("Unexpected opencl builtin type!");
- return nullptr;
-#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
- case BuiltinType::Id: \
- return getPointerType(T, "opencl." #ImgType "_" #Suffix "_t");
-#include "clang/Basic/OpenCLImageTypes.def"
- case BuiltinType::OCLSampler:
+ if (T->isSamplerT())
return getSamplerType(T);
- case BuiltinType::OCLEvent:
- return getPointerType(T, "opencl.event_t");
- case BuiltinType::OCLClkEvent:
- return getPointerType(T, "opencl.clk_event_t");
- case BuiltinType::OCLQueue:
- return getPointerType(T, "opencl.queue_t");
- case BuiltinType::OCLReserveID:
- return getPointerType(T, "opencl.reserve_id_t");
-#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
- case BuiltinType::Id: \
- return getPointerType(T, "opencl." #ExtType);
-#include "clang/Basic/OpenCLExtensionTypes.def"
- }
-}
-llvm::PointerType *CGOpenCLRuntime::getPointerType(const Type *T,
- StringRef Name) {
- auto I = CachedTys.find(Name);
- if (I != CachedTys.end())
- return I->second;
+ return getPointerType(T);
+}
- llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+llvm::PointerType *CGOpenCLRuntime::getPointerType(const Type *T) {
uint32_t AddrSpc = CGM.getContext().getTargetAddressSpace(
CGM.getContext().getOpenCLTypeAddrSpace(T));
- auto *PTy =
- llvm::PointerType::get(llvm::StructType::create(Ctx, Name), AddrSpc);
- CachedTys[Name] = PTy;
- return PTy;
+ return llvm::PointerType::get(CGM.getLLVMContext(), AddrSpc);
}
llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) {
@@ -90,10 +62,7 @@ llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) {
llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T, StringRef Name,
llvm::Type *&PipeTy) {
if (!PipeTy)
- PipeTy = llvm::PointerType::get(llvm::StructType::create(
- CGM.getLLVMContext(), Name),
- CGM.getContext().getTargetAddressSpace(
- CGM.getContext().getOpenCLTypeAddrSpace(T)));
+ PipeTy = getPointerType(T);
return PipeTy;
}
@@ -105,10 +74,7 @@ llvm::Type *CGOpenCLRuntime::getSamplerType(const Type *T) {
CGM, CGM.getContext().OCLSamplerTy.getTypePtr()))
SamplerTy = TransTy;
else
- SamplerTy = llvm::PointerType::get(
- llvm::StructType::create(CGM.getLLVMContext(), "opencl.sampler_t"),
- CGM.getContext().getTargetAddressSpace(
- CGM.getContext().getOpenCLTypeAddrSpace(T)));
+ SamplerTy = getPointerType(T);
return SamplerTy;
}
@@ -134,7 +100,7 @@ llvm::Value *CGOpenCLRuntime::getPipeElemAlign(const Expr *PipeArg) {
llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
assert(CGM.getLangOpts().OpenCL);
- return llvm::IntegerType::getInt8PtrTy(
+ return llvm::PointerType::get(
CGM.getLLVMContext(),
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h
index df8084d6008b..34613c3516f3 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.h
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.h
@@ -39,7 +39,6 @@ protected:
llvm::Type *PipeROTy;
llvm::Type *PipeWOTy;
llvm::Type *SamplerTy;
- llvm::StringMap<llvm::PointerType *> CachedTys;
/// Structure for enqueued block information.
struct EnqueuedBlockInfo {
@@ -53,7 +52,7 @@ protected:
virtual llvm::Type *getPipeType(const PipeType *T, StringRef Name,
llvm::Type *&PipeTy);
- llvm::PointerType *getPointerType(const Type *T, StringRef Name);
+ llvm::PointerType *getPointerType(const Type *T);
public:
CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM),
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a52ec8909b12..7f7e6f530666 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -41,6 +41,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
+#include <cstdint>
#include <numeric>
#include <optional>
@@ -479,27 +480,6 @@ enum OpenMPLocationFlags : unsigned {
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
};
-namespace {
-LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
-/// Values for bit flags for marking which requires clauses have been used.
-enum OpenMPOffloadingRequiresDirFlags : int64_t {
- /// flag undefined.
- OMP_REQ_UNDEFINED = 0x000,
- /// no requires clause present.
- OMP_REQ_NONE = 0x001,
- /// reverse_offload clause.
- OMP_REQ_REVERSE_OFFLOAD = 0x002,
- /// unified_address clause.
- OMP_REQ_UNIFIED_ADDRESS = 0x004,
- /// unified_shared_memory clause.
- OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
- /// dynamic_allocators clause.
- OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
-};
-
-} // anonymous namespace
-
/// Describes ident structure that describes a source location.
/// All descriptions are taken from
/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
@@ -1054,12 +1034,15 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
: CGM(CGM), OMPBuilder(CGM.getModule()) {
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
- llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice,
- isGPU(), hasRequiresUnifiedSharedMemory(),
- CGM.getLangOpts().OpenMPOffloadMandatory);
- OMPBuilder.initialize(CGM.getLangOpts().OpenMPIsTargetDevice
- ? CGM.getLangOpts().OMPHostIRFile
- : StringRef{});
+ llvm::OpenMPIRBuilderConfig Config(
+ CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
+ CGM.getLangOpts().OpenMPOffloadMandatory,
+ /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
+ hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
+ OMPBuilder.initialize();
+ OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
+ ? CGM.getLangOpts().OMPHostIRFile
+ : StringRef{});
OMPBuilder.setConfig(Config);
}
@@ -1091,9 +1074,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
QualType PtrTy = C.getPointerType(Ty).withRestrict();
FunctionArgList Args;
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
- /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
+ /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
- /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
+ /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
Args.push_back(&OmpOutParm);
Args.push_back(&OmpInParm);
const CGFunctionInfo &FnInfo =
@@ -1150,7 +1133,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction(
if (const Expr *Init = D->getInitializer()) {
Initializer = emitCombinerOrInitializer(
CGM, D->getType(),
- D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
+ D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
: nullptr,
cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
@@ -1458,6 +1441,7 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
llvm::CallInst *Call = CGF.Builder.CreateCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
OMPRTL___kmpc_global_thread_num),
@@ -1502,103 +1486,6 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
}
-llvm::FunctionCallee
-CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
- bool IsGPUDistribute) {
- assert((IVSize == 32 || IVSize == 64) &&
- "IV size is not compatible with the omp runtime");
- StringRef Name;
- if (IsGPUDistribute)
- Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
- : "__kmpc_distribute_static_init_4u")
- : (IVSigned ? "__kmpc_distribute_static_init_8"
- : "__kmpc_distribute_static_init_8u");
- else
- Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
- : "__kmpc_for_static_init_4u")
- : (IVSigned ? "__kmpc_for_static_init_8"
- : "__kmpc_for_static_init_8u");
-
- llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
- auto *PtrTy = llvm::PointerType::getUnqual(ITy);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), // loc
- CGM.Int32Ty, // tid
- CGM.Int32Ty, // schedtype
- llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
- PtrTy, // p_lower
- PtrTy, // p_upper
- PtrTy, // p_stride
- ITy, // incr
- ITy // chunk
- };
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- return CGM.CreateRuntimeFunction(FnTy, Name);
-}
-
-llvm::FunctionCallee
-CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
- assert((IVSize == 32 || IVSize == 64) &&
- "IV size is not compatible with the omp runtime");
- StringRef Name =
- IVSize == 32
- ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
- : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
- llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
- llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
- CGM.Int32Ty, // tid
- CGM.Int32Ty, // schedtype
- ITy, // lower
- ITy, // upper
- ITy, // stride
- ITy // chunk
- };
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- return CGM.CreateRuntimeFunction(FnTy, Name);
-}
-
-llvm::FunctionCallee
-CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
- assert((IVSize == 32 || IVSize == 64) &&
- "IV size is not compatible with the omp runtime");
- StringRef Name =
- IVSize == 32
- ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
- : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), // loc
- CGM.Int32Ty, // tid
- };
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- return CGM.CreateRuntimeFunction(FnTy, Name);
-}
-
-llvm::FunctionCallee
-CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
- assert((IVSize == 32 || IVSize == 64) &&
- "IV size is not compatible with the omp runtime");
- StringRef Name =
- IVSize == 32
- ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
- : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
- llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
- auto *PtrTy = llvm::PointerType::getUnqual(ITy);
- llvm::Type *TypeParams[] = {
- getIdentTyPointerTy(), // loc
- CGM.Int32Ty, // tid
- llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
- PtrTy, // p_lower
- PtrTy, // p_upper
- PtrTy // p_stride
- };
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- return CGM.CreateRuntimeFunction(FnTy, Name);
-}
-
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
convertDeviceClause(const VarDecl *VD) {
std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
@@ -1653,7 +1540,7 @@ static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
llvm::sys::fs::UniqueID ID;
- if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
+ if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
}
@@ -1667,7 +1554,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
auto LinkageForVariable = [&VD, this]() {
- return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+ return CGM.getLLVMLinkageVarDefinition(VD);
};
std::vector<llvm::GlobalVariable *> GeneratedRefs;
@@ -1761,7 +1648,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
/*Id=*/nullptr, CGM.getContext().VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&Dst);
const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1793,7 +1680,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
/*Id=*/nullptr, CGM.getContext().VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&Dst);
const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1861,134 +1748,39 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
-bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
- llvm::GlobalVariable *Addr,
- bool PerformInit) {
- if (CGM.getLangOpts().OMPTargetTriples.empty() &&
- !CGM.getLangOpts().OpenMPIsTargetDevice)
- return false;
- std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
- ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
- HasRequiresUnifiedSharedMemory))
- return CGM.getLangOpts().OpenMPIsTargetDevice;
- VD = VD->getDefinition(CGM.getContext());
- assert(VD && "Unknown VarDecl");
-
- if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
- return CGM.getLangOpts().OpenMPIsTargetDevice;
+void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
+ llvm::GlobalValue *GV) {
+ std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
+ OMPDeclareTargetDeclAttr::getActiveAttr(FD);
- QualType ASTTy = VD->getType();
- SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
-
- // Produce the unique prefix to identify the new target regions. We use
- // the source location of the variable declaration which we know to not
- // conflict with any target region.
- llvm::TargetRegionEntryInfo EntryInfo =
- getEntryInfoFromPresumedLoc(CGM, OMPBuilder, Loc, VD->getName());
- SmallString<128> Buffer, Out;
- OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
-
- const Expr *Init = VD->getAnyInitializer();
- if (CGM.getLangOpts().CPlusPlus && PerformInit) {
- llvm::Constant *Ctor;
- llvm::Constant *ID;
- if (CGM.getLangOpts().OpenMPIsTargetDevice) {
- // Generate function that re-emits the declaration's initializer into
- // the threadprivate copy of the variable VD
- CodeGenFunction CtorCGF(CGM);
+ // We only need to handle active 'indirect' declare target functions.
+ if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
+ return;
- const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
- FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
- llvm::GlobalValue::WeakODRLinkage);
- Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
- if (CGM.getTriple().isAMDGCN())
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
- auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
- CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
- FunctionArgList(), Loc, Loc);
- auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
- llvm::Constant *AddrInAS0 = Addr;
- if (Addr->getAddressSpace() != 0)
- AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
- Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
- CtorCGF.EmitAnyExprToMem(Init,
- Address(AddrInAS0, Addr->getValueType(),
- CGM.getContext().getDeclAlign(VD)),
- Init->getType().getQualifiers(),
- /*IsInitializer=*/true);
- CtorCGF.FinishFunction();
- Ctor = Fn;
- ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
- } else {
- Ctor = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage,
- llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
- ID = Ctor;
- }
+ // Get a mangled name to store the new device global in.
+ llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
+ CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
+ SmallString<128> Name;
+ OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
- // Register the information for the entry associated with the constructor.
- Out.clear();
- auto CtorEntryInfo = EntryInfo;
- CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
- OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
- CtorEntryInfo, Ctor, ID,
- llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
+ // We need to generate a new global to hold the address of the indirectly
+ // called device function. Doing this allows us to keep the visibility and
+ // linkage of the associated function unchanged while allowing the runtime to
+ // access its value.
+ llvm::GlobalValue *Addr = GV;
+ if (CGM.getLangOpts().OpenMPIsTargetDevice) {
+ Addr = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy,
+ /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
+ nullptr, llvm::GlobalValue::NotThreadLocal,
+ CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
+ Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
}
- if (VD->getType().isDestructedType() != QualType::DK_none) {
- llvm::Constant *Dtor;
- llvm::Constant *ID;
- if (CGM.getLangOpts().OpenMPIsTargetDevice) {
- // Generate function that emits destructor call for the threadprivate
- // copy of the variable VD
- CodeGenFunction DtorCGF(CGM);
- const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
- FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
- llvm::GlobalValue::WeakODRLinkage);
- Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
- if (CGM.getTriple().isAMDGCN())
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
- auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
- DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
- FunctionArgList(), Loc, Loc);
- // Create a scope with an artificial location for the body of this
- // function.
- auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
- llvm::Constant *AddrInAS0 = Addr;
- if (Addr->getAddressSpace() != 0)
- AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
- Addr, llvm::PointerType::get(CGM.getLLVMContext(), 0));
- DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
- CGM.getContext().getDeclAlign(VD)),
- ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
- DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
- DtorCGF.FinishFunction();
- Dtor = Fn;
- ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
- } else {
- Dtor = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage,
- llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
- ID = Dtor;
- }
- // Register the information for the entry associated with the destructor.
- Out.clear();
- auto DtorEntryInfo = EntryInfo;
- DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
- OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
- DtorEntryInfo, Dtor, ID,
- llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
- }
- return CGM.getLangOpts().OpenMPIsTargetDevice;
+ OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
+ Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
+ llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
+ llvm::GlobalValue::WeakODRLinkage);
}
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
@@ -2161,11 +1953,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
std::string Name = getName({Prefix, "var"});
- llvm::GlobalVariable *G = OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
- llvm::Align PtrAlign = OMPBuilder.M.getDataLayout().getPointerABIAlignment(G->getAddressSpace());
- if (PtrAlign > llvm::Align(G->getAlignment()))
- G->setAlignment(PtrAlign);
- return G;
+ return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
}
namespace {
@@ -2353,9 +2141,9 @@ static llvm::Value *emitCopyprivateCopyFunction(
// void copy_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
const auto &CGFI =
@@ -2451,7 +2239,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
QualType CopyprivateArrayTy = C.getConstantArrayType(
- C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
+ C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
/*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
Address CopyprivateList =
@@ -2753,7 +2541,8 @@ void CGOpenMPRuntime::emitForDispatchInit(
CGF.Builder.getIntN(IVSize, 1), // Stride
Chunk // Chunk
};
- CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
+ CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
+ Args);
}
static void emitForStaticInitCall(
@@ -2820,7 +2609,8 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
: OMP_IDENT_WORK_SECTIONS);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
- createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
+ OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
+ false);
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
@@ -2839,7 +2629,7 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
bool isGPUDistribute =
CGM.getLangOpts().OpenMPIsTargetDevice &&
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
- StaticInitFunction = createForStaticInitFunction(
+ StaticInitFunction = OMPBuilder.createForStaticInitFunction(
Values.IVSize, Values.IVSigned, isGPUDistribute);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
@@ -2883,7 +2673,8 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
return;
// Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
- CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
+ CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
+ Args);
}
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
@@ -2903,8 +2694,8 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
UB.getPointer(), // &Upper
ST.getPointer() // &Stride
};
- llvm::Value *Call =
- CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
+ llvm::Value *Call = CGF.EmitRuntimeCall(
+ OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
return CGF.EmitScalarConversion(
Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
CGF.getContext().BoolTy, Loc);
@@ -2993,8 +2784,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
E = CGM.getContext().getSourceManager().fileinfo_end();
I != E; ++I) {
- if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
- I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
+ if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
+ I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
Loc = CGM.getContext().getSourceManager().translateFileLineCol(
I->getFirst(), EntryInfo.Line, 1);
break;
@@ -3121,7 +2912,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
// kmp_int32 liter;
// void * reductions;
// };
- RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
+ RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
UD->startDefinition();
addFieldToRecordDecl(C, UD, KmpInt32Ty);
addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
@@ -3187,10 +2978,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
ASTContext &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
KmpTaskTWithPrivatesPtrQTy.withRestrict(),
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
const auto &TaskEntryFnInfo =
@@ -3289,10 +3080,10 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
ASTContext &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
KmpTaskTWithPrivatesPtrQTy.withRestrict(),
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
const auto &DestructorFnInfo =
@@ -3349,7 +3140,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
ImplicitParamDecl TaskPrivatesArg(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(PrivatesQTy).withConst().withRestrict(),
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&TaskPrivatesArg);
llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
unsigned Counter = 1;
@@ -3359,7 +3150,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
C.getPointerType(C.getPointerType(E->getType()))
.withConst()
.withRestrict(),
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
@@ -3370,7 +3161,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
C.getPointerType(C.getPointerType(E->getType()))
.withConst()
.withRestrict(),
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
@@ -3381,7 +3172,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
C.getPointerType(C.getPointerType(E->getType()))
.withConst()
.withRestrict(),
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
@@ -3395,7 +3186,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
- ImplicitParamDecl::Other));
+ ImplicitParamKind::Other));
PrivateVarsPos[VD] = Counter;
++Counter;
}
@@ -3599,12 +3390,12 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
FunctionArgList Args;
ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
KmpTaskTWithPrivatesPtrQTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
KmpTaskTWithPrivatesPtrQTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&DstArg);
Args.push_back(&SrcArg);
Args.push_back(&LastprivArg);
@@ -4018,12 +3809,12 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
VK_PRValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
RValue::get(NumOfElements));
- KmpTaskAffinityInfoArrayTy =
- C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
- /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
+ KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
+ KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
+ /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
// Properly emit variable-sized array.
auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
CGF.EmitVarDecl(*PD);
AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
@@ -4032,7 +3823,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
KmpTaskAffinityInfoTy,
llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
- ArrayType::Normal, /*IndexTypeQuals=*/0);
+ ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
AffinitiesArray =
CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
@@ -4477,12 +4268,12 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
RValue::get(NumOfElements));
KmpDependInfoArrayTy =
- C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
+ C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
/*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
// CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
// Properly emit variable-sized array.
auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
CGF.EmitVarDecl(*PD);
DependenciesArray = CGF.GetAddrOfLocalVar(PD);
NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
@@ -4490,7 +4281,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
} else {
KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
- ArrayType::Normal, /*IndexTypeQuals=*/0);
+ ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
@@ -4570,7 +4361,7 @@ Address CGOpenMPRuntime::emitDepobjDependClause(
} else {
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
- nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
Size = CGM.getSize(Sz.alignTo(Align));
NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
@@ -5012,9 +4803,9 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction(
// void reduction_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
const auto &CGFI =
@@ -5186,9 +4977,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
++Size;
}
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
- QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType ReductionArrayTy = C.getConstantArrayType(
+ C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
+ /*IndexTypeQuals=*/0);
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
const auto *IPriv = Privates.begin();
@@ -5451,9 +5242,9 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
VoidPtrTy.addRestrict();
FunctionArgList Args;
ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.emplace_back(&Param);
Args.emplace_back(&ParamOrig);
const auto &FnInfo =
@@ -5522,9 +5313,9 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
FunctionArgList Args;
ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.emplace_back(&ParamInOut);
Args.emplace_back(&ParamIn);
const auto &FnInfo =
@@ -5594,7 +5385,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
ASTContext &C = CGM.getContext();
FunctionArgList Args;
ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Args.emplace_back(&Param);
const auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
@@ -5657,8 +5448,9 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
QualType RDType = C.getRecordType(RD);
unsigned Size = Data.ReductionVars.size();
llvm::APInt ArraySize(/*numBits=*/64, Size);
- QualType ArrayRDType = C.getConstantArrayType(
- RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ QualType ArrayRDType =
+ C.getConstantArrayType(RDType, ArraySize, nullptr,
+ ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
@@ -6082,6 +5874,42 @@ void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
{ThreadId, AllocatorVal});
}
+void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
+ const OMPExecutableDirective &D, CodeGenFunction &CGF,
+ int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
+ int32_t &MaxTeamsVal) {
+
+ getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
+ getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
+ /*UpperBoundOnly=*/true);
+
+ for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
+ for (auto *A : C->getAttrs()) {
+ int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
+ int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
+ if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
+ CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
+ &AttrMinBlocksVal, &AttrMaxBlocksVal);
+ else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
+ CGM.handleAMDGPUFlatWorkGroupSizeAttr(
+ nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
+ &AttrMaxThreadsVal);
+ else
+ continue;
+
+ MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
+ if (AttrMaxThreadsVal > 0)
+ MaxThreadsVal = MaxThreadsVal > 0
+ ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
+ : AttrMaxThreadsVal;
+ MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
+ if (AttrMaxBlocksVal > 0)
+ MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
+ : AttrMaxBlocksVal;
+ }
+ }
+}
+
void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -6100,18 +5928,20 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
};
- // Get NumTeams and ThreadLimit attributes
- int32_t DefaultValTeams = -1;
- int32_t DefaultValThreads = -1;
- getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
- getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
-
OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
- DefaultValTeams, DefaultValThreads,
IsOffloadEntry, OutlinedFn, OutlinedFnID);
- if (OutlinedFn != nullptr)
- CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+ if (!OutlinedFn)
+ return;
+
+ CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+
+ for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
+ for (auto *A : C->getAttrs()) {
+ if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
+ CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
+ }
+ }
}
/// Checks if the expression is constant or does not have non-trivial function
@@ -6167,8 +5997,8 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
}
const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
- CodeGenFunction &CGF, const OMPExecutableDirective &D,
- int32_t &DefaultVal) {
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
+ int32_t &MaxTeamsVal) {
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
@@ -6189,22 +6019,22 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
if (auto Constant =
NumTeams->getIntegerConstantExpr(CGF.getContext()))
- DefaultVal = Constant->getExtValue();
+ MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
return NumTeams;
}
- DefaultVal = 0;
+ MinTeamsVal = MaxTeamsVal = 0;
return nullptr;
}
if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
- DefaultVal = 1;
+ MinTeamsVal = MaxTeamsVal = 1;
return nullptr;
}
- DefaultVal = 1;
+ MinTeamsVal = MaxTeamsVal = 1;
return nullptr;
}
// A value of -1 is used to check if we need to emit no teams region
- DefaultVal = -1;
+ MinTeamsVal = MaxTeamsVal = -1;
return nullptr;
}
case OMPD_target_teams_loop:
@@ -6218,10 +6048,10 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
- DefaultVal = Constant->getExtValue();
+ MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
return NumTeams;
}
- DefaultVal = 0;
+ MinTeamsVal = MaxTeamsVal = 0;
return nullptr;
}
case OMPD_target_parallel:
@@ -6229,7 +6059,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
case OMPD_target_parallel_for_simd:
case OMPD_target_parallel_loop:
case OMPD_target_simd:
- DefaultVal = 1;
+ MinTeamsVal = MaxTeamsVal = 1;
return nullptr;
case OMPD_parallel:
case OMPD_for:
@@ -6304,8 +6134,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
"Clauses associated with the teams directive expected to be emitted "
"only for the host!");
CGBuilderTy &Bld = CGF.Builder;
- int32_t DefaultNT = -1;
- const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
+ int32_t MinNT = -1, MaxNT = -1;
+ const Expr *NumTeams =
+ getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
if (NumTeams != nullptr) {
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
@@ -6335,239 +6166,142 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
}
}
- return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
+ assert(MinNT == MaxNT && "Num threads ranges require handling here.");
+ return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
}
-static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
- llvm::Value *DefaultThreadLimitVal) {
+/// Check for a num threads constant value (stored in \p DefaultVal), or
+/// expression (stored in \p E). If the value is conditional (via an if-clause),
+/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
+/// nullptr, no expression evaluation is perfomed.
+static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
+ const Expr **E, int32_t &UpperBound,
+ bool UpperBoundOnly, llvm::Value **CondVal) {
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
CGF.getContext(), CS->getCapturedStmt());
- if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
- if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
- llvm::Value *NumThreads = nullptr;
- llvm::Value *CondVal = nullptr;
- // Handle if clause. If if clause present, the number of threads is
- // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
- if (Dir->hasClausesOfKind<OMPIfClause>()) {
- CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- const OMPIfClause *IfClause = nullptr;
- for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
- if (C->getNameModifier() == OMPD_unknown ||
- C->getNameModifier() == OMPD_parallel) {
- IfClause = C;
- break;
- }
+ const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
+ if (!Dir)
+ return;
+
+ if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
+ // Handle if clause. If if clause present, the number of threads is
+ // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
+ if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ const OMPIfClause *IfClause = nullptr;
+ for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
+ if (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_parallel) {
+ IfClause = C;
+ break;
}
- if (IfClause) {
- const Expr *Cond = IfClause->getCondition();
- bool Result;
- if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
- if (!Result)
- return CGF.Builder.getInt32(1);
- } else {
- CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
- if (const auto *PreInit =
- cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
- for (const auto *I : PreInit->decls()) {
- if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
- CGF.EmitVarDecl(cast<VarDecl>(*I));
- } else {
- CodeGenFunction::AutoVarEmission Emission =
- CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
- CGF.EmitAutoVarCleanups(Emission);
- }
+ }
+ if (IfClause) {
+ const Expr *CondExpr = IfClause->getCondition();
+ bool Result;
+ if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
+ if (!Result) {
+ UpperBound = 1;
+ return;
+ }
+ } else {
+ CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
+ for (const auto *I : PreInit->decls()) {
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
+ } else {
+ CodeGenFunction::AutoVarEmission Emission =
+ CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+ CGF.EmitAutoVarCleanups(Emission);
}
}
- CondVal = CGF.EvaluateExprAsBool(Cond);
+ *CondVal = CGF.EvaluateExprAsBool(CondExpr);
}
}
}
- // Check the value of num_threads clause iff if clause was not specified
- // or is not evaluated to false.
- if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
- CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- const auto *NumThreadsClause =
- Dir->getSingleClause<OMPNumThreadsClause>();
- CodeGenFunction::LexicalScope Scope(
- CGF, NumThreadsClause->getNumThreads()->getSourceRange());
- if (const auto *PreInit =
- cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
- for (const auto *I : PreInit->decls()) {
- if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
- CGF.EmitVarDecl(cast<VarDecl>(*I));
- } else {
- CodeGenFunction::AutoVarEmission Emission =
- CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
- CGF.EmitAutoVarCleanups(Emission);
- }
+ }
+ // Check the value of num_threads clause iff if clause was not specified
+ // or is not evaluated to false.
+ if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ const auto *NumThreadsClause =
+ Dir->getSingleClause<OMPNumThreadsClause>();
+ const Expr *NTExpr = NumThreadsClause->getNumThreads();
+ if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
+ if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
+ UpperBound =
+ UpperBound
+ ? Constant->getZExtValue()
+ : std::min(UpperBound,
+ static_cast<int32_t>(Constant->getZExtValue()));
+ // If we haven't found a upper bound, remember we saw a thread limiting
+ // clause.
+ if (UpperBound == -1)
+ UpperBound = 0;
+ if (!E)
+ return;
+ CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
+ for (const auto *I : PreInit->decls()) {
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
+ } else {
+ CodeGenFunction::AutoVarEmission Emission =
+ CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+ CGF.EmitAutoVarCleanups(Emission);
}
}
- NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
- NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
- /*isSigned=*/false);
- if (DefaultThreadLimitVal)
- NumThreads = CGF.Builder.CreateSelect(
- CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
- DefaultThreadLimitVal, NumThreads);
- } else {
- NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
- : CGF.Builder.getInt32(0);
- }
- // Process condition of the if clause.
- if (CondVal) {
- NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
- CGF.Builder.getInt32(1));
}
- return NumThreads;
+ *E = NTExpr;
}
- if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
- return CGF.Builder.getInt32(1);
+ return;
}
- return DefaultThreadLimitVal;
+ if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
+ UpperBound = 1;
}
const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
- CodeGenFunction &CGF, const OMPExecutableDirective &D,
- int32_t &DefaultVal) {
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
+ bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
+ assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
+ "Clauses associated with the teams directive expected to be emitted "
+ "only for the host!");
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
"Expected target-based executable directive.");
- switch (DirectiveKind) {
- case OMPD_target:
- // Teams have no clause thread_limit
- return nullptr;
- case OMPD_target_teams:
- case OMPD_target_teams_distribute:
- if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
- const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
- const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
- if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
- if (auto Constant =
- ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
- DefaultVal = Constant->getExtValue();
- return ThreadLimit;
- }
- return nullptr;
- case OMPD_target_teams_loop:
- case OMPD_target_parallel_loop:
- case OMPD_target_parallel:
- case OMPD_target_parallel_for:
- case OMPD_target_parallel_for_simd:
- case OMPD_target_teams_distribute_parallel_for:
- case OMPD_target_teams_distribute_parallel_for_simd: {
- Expr *ThreadLimit = nullptr;
- Expr *NumThreads = nullptr;
- if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
- const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
- ThreadLimit = ThreadLimitClause->getThreadLimit();
- if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
- if (auto Constant =
- ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
- DefaultVal = Constant->getExtValue();
- }
- if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
- const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
- NumThreads = NumThreadsClause->getNumThreads();
- if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
- if (auto Constant =
- NumThreads->getIntegerConstantExpr(CGF.getContext())) {
- if (Constant->getExtValue() < DefaultVal) {
- DefaultVal = Constant->getExtValue();
- ThreadLimit = NumThreads;
- }
- }
- }
+ const Expr *NT = nullptr;
+ const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
+
+ auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
+ if (E->isIntegerConstantExpr(CGF.getContext())) {
+ if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
+ UpperBound = UpperBound ? Constant->getZExtValue()
+ : std::min(UpperBound,
+ int32_t(Constant->getZExtValue()));
}
- return ThreadLimit;
- }
- case OMPD_target_teams_distribute_simd:
- case OMPD_target_simd:
- DefaultVal = 1;
- return nullptr;
- case OMPD_parallel:
- case OMPD_for:
- case OMPD_parallel_for:
- case OMPD_parallel_master:
- case OMPD_parallel_sections:
- case OMPD_for_simd:
- case OMPD_parallel_for_simd:
- case OMPD_cancel:
- case OMPD_cancellation_point:
- case OMPD_ordered:
- case OMPD_threadprivate:
- case OMPD_allocate:
- case OMPD_task:
- case OMPD_simd:
- case OMPD_tile:
- case OMPD_unroll:
- case OMPD_sections:
- case OMPD_section:
- case OMPD_single:
- case OMPD_master:
- case OMPD_critical:
- case OMPD_taskyield:
- case OMPD_barrier:
- case OMPD_taskwait:
- case OMPD_taskgroup:
- case OMPD_atomic:
- case OMPD_flush:
- case OMPD_depobj:
- case OMPD_scan:
- case OMPD_teams:
- case OMPD_target_data:
- case OMPD_target_exit_data:
- case OMPD_target_enter_data:
- case OMPD_distribute:
- case OMPD_distribute_simd:
- case OMPD_distribute_parallel_for:
- case OMPD_distribute_parallel_for_simd:
- case OMPD_teams_distribute:
- case OMPD_teams_distribute_simd:
- case OMPD_teams_distribute_parallel_for:
- case OMPD_teams_distribute_parallel_for_simd:
- case OMPD_target_update:
- case OMPD_declare_simd:
- case OMPD_declare_variant:
- case OMPD_begin_declare_variant:
- case OMPD_end_declare_variant:
- case OMPD_declare_target:
- case OMPD_end_declare_target:
- case OMPD_declare_reduction:
- case OMPD_declare_mapper:
- case OMPD_taskloop:
- case OMPD_taskloop_simd:
- case OMPD_master_taskloop:
- case OMPD_master_taskloop_simd:
- case OMPD_parallel_master_taskloop:
- case OMPD_parallel_master_taskloop_simd:
- case OMPD_requires:
- case OMPD_unknown:
- break;
- default:
- break;
- }
- llvm_unreachable("Unsupported directive kind.");
-}
+ // If we haven't found a upper bound, remember we saw a thread limiting
+ // clause.
+ if (UpperBound == -1)
+ UpperBound = 0;
+ if (EPtr)
+ *EPtr = E;
+ };
+
+ auto ReturnSequential = [&]() {
+ UpperBound = 1;
+ return NT;
+ };
-llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
- CodeGenFunction &CGF, const OMPExecutableDirective &D) {
- assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
- "Clauses associated with the teams directive expected to be emitted "
- "only for the host!");
- OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
- assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
- "Expected target-based executable directive.");
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Value *ThreadLimitVal = nullptr;
- llvm::Value *NumThreadsVal = nullptr;
switch (DirectiveKind) {
case OMPD_target: {
const CapturedStmt *CS = D.getInnermostCapturedStmt();
- if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
- return NumThreads;
+ getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
CGF.getContext(), CS->getCapturedStmt());
// TODO: The standard is not clear how to resolve two thread limit clauses,
@@ -6576,30 +6310,28 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
ThreadLimitClause = TLC;
- CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- CodeGenFunction::LexicalScope Scope(
- CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
- if (const auto *PreInit =
- cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
- for (const auto *I : PreInit->decls()) {
- if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
- CGF.EmitVarDecl(cast<VarDecl>(*I));
- } else {
- CodeGenFunction::AutoVarEmission Emission =
- CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
- CGF.EmitAutoVarCleanups(Emission);
+ if (ThreadLimitExpr) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ CodeGenFunction::LexicalScope Scope(
+ CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
+ if (const auto *PreInit =
+ cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
+ for (const auto *I : PreInit->decls()) {
+ if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
+ CGF.EmitVarDecl(cast<VarDecl>(*I));
+ } else {
+ CodeGenFunction::AutoVarEmission Emission =
+ CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+ CGF.EmitAutoVarCleanups(Emission);
+ }
}
}
}
}
}
- if (ThreadLimitClause) {
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
- ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
- ThreadLimitVal =
- Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
- }
+ if (ThreadLimitClause)
+ CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
!isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
@@ -6608,53 +6340,41 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
CGF.getContext(), CS->getCapturedStmt());
Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
}
- if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
- !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
+ if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
CS = Dir->getInnermostCapturedStmt();
- if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
- return NumThreads;
- }
- if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
- return Bld.getInt32(1);
+ getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
+ } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
+ return ReturnSequential();
}
- return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
+ return NT;
}
case OMPD_target_teams: {
if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
- ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
- ThreadLimitVal =
- Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
+ CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
}
const CapturedStmt *CS = D.getInnermostCapturedStmt();
- if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
- return NumThreads;
+ getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
CGF.getContext(), CS->getCapturedStmt());
if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
if (Dir->getDirectiveKind() == OMPD_distribute) {
CS = Dir->getInnermostCapturedStmt();
- if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
- return NumThreads;
+ getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
}
}
- return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
+ return NT;
}
case OMPD_target_teams_distribute:
if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
- ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
- ThreadLimitVal =
- Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
+ CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
}
- if (llvm::Value *NumThreads =
- getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
- return NumThreads;
- return Bld.getInt32(0);
+ getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
+ UpperBoundOnly, CondVal);
+ return NT;
case OMPD_target_teams_loop:
case OMPD_target_parallel_loop:
case OMPD_target_parallel:
@@ -6662,10 +6382,7 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd: {
- llvm::Value *CondVal = nullptr;
- // Handle if clause. If if clause present, the number of threads is
- // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
- if (D.hasClausesOfKind<OMPIfClause>()) {
+ if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
const OMPIfClause *IfClause = nullptr;
for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
if (C->getNameModifier() == OMPD_unknown ||
@@ -6679,109 +6396,92 @@ llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
bool Result;
if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
if (!Result)
- return Bld.getInt32(1);
+ return ReturnSequential();
} else {
CodeGenFunction::RunCleanupsScope Scope(CGF);
- CondVal = CGF.EvaluateExprAsBool(Cond);
+ *CondVal = CGF.EvaluateExprAsBool(Cond);
}
}
}
if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
- ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
- ThreadLimitVal =
- Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
+ CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
}
if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
- llvm::Value *NumThreads = CGF.EmitScalarExpr(
- NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
- NumThreadsVal =
- Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
- ThreadLimitVal = ThreadLimitVal
- ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
- ThreadLimitVal),
- NumThreadsVal, ThreadLimitVal)
- : NumThreadsVal;
+ CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
+ return NumThreadsClause->getNumThreads();
}
- if (!ThreadLimitVal)
- ThreadLimitVal = Bld.getInt32(0);
- if (CondVal)
- return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
- return ThreadLimitVal;
+ return NT;
}
case OMPD_target_teams_distribute_simd:
case OMPD_target_simd:
- return Bld.getInt32(1);
- case OMPD_parallel:
- case OMPD_for:
- case OMPD_parallel_for:
- case OMPD_parallel_master:
- case OMPD_parallel_sections:
- case OMPD_for_simd:
- case OMPD_parallel_for_simd:
- case OMPD_cancel:
- case OMPD_cancellation_point:
- case OMPD_ordered:
- case OMPD_threadprivate:
- case OMPD_allocate:
- case OMPD_task:
- case OMPD_simd:
- case OMPD_tile:
- case OMPD_unroll:
- case OMPD_sections:
- case OMPD_section:
- case OMPD_single:
- case OMPD_master:
- case OMPD_critical:
- case OMPD_taskyield:
- case OMPD_barrier:
- case OMPD_taskwait:
- case OMPD_taskgroup:
- case OMPD_atomic:
- case OMPD_flush:
- case OMPD_depobj:
- case OMPD_scan:
- case OMPD_teams:
- case OMPD_target_data:
- case OMPD_target_exit_data:
- case OMPD_target_enter_data:
- case OMPD_distribute:
- case OMPD_distribute_simd:
- case OMPD_distribute_parallel_for:
- case OMPD_distribute_parallel_for_simd:
- case OMPD_teams_distribute:
- case OMPD_teams_distribute_simd:
- case OMPD_teams_distribute_parallel_for:
- case OMPD_teams_distribute_parallel_for_simd:
- case OMPD_target_update:
- case OMPD_declare_simd:
- case OMPD_declare_variant:
- case OMPD_begin_declare_variant:
- case OMPD_end_declare_variant:
- case OMPD_declare_target:
- case OMPD_end_declare_target:
- case OMPD_declare_reduction:
- case OMPD_declare_mapper:
- case OMPD_taskloop:
- case OMPD_taskloop_simd:
- case OMPD_master_taskloop:
- case OMPD_master_taskloop_simd:
- case OMPD_parallel_master_taskloop:
- case OMPD_parallel_master_taskloop_simd:
- case OMPD_requires:
- case OMPD_metadirective:
- case OMPD_unknown:
- break;
+ return ReturnSequential();
default:
break;
}
llvm_unreachable("Unsupported directive kind.");
}
+llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) {
+ llvm::Value *NumThreadsVal = nullptr;
+ llvm::Value *CondVal = nullptr;
+ llvm::Value *ThreadLimitVal = nullptr;
+ const Expr *ThreadLimitExpr = nullptr;
+ int32_t UpperBound = -1;
+
+ const Expr *NT = getNumThreadsExprForTargetDirective(
+ CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
+ &ThreadLimitExpr);
+
+ // Thread limit expressions are used below, emit them.
+ if (ThreadLimitExpr) {
+ ThreadLimitVal =
+ CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
+ ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
+ /*isSigned=*/false);
+ }
+
+ // Generate the num teams expression.
+ if (UpperBound == 1) {
+ NumThreadsVal = CGF.Builder.getInt32(UpperBound);
+ } else if (NT) {
+ NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
+ NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
+ /*isSigned=*/false);
+ } else if (ThreadLimitVal) {
+ // If we do not have a num threads value but a thread limit, replace the
+ // former with the latter. We know handled the thread limit expression.
+ NumThreadsVal = ThreadLimitVal;
+ ThreadLimitVal = nullptr;
+ } else {
+ // Default to "0" which means runtime choice.
+ assert(!ThreadLimitVal && "Default not applicable with thread limit value");
+ NumThreadsVal = CGF.Builder.getInt32(0);
+ }
+
+ // Handle if clause. If if clause present, the number of threads is
+ // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
+ if (CondVal) {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
+ CGF.Builder.getInt32(1));
+ }
+
+ // If the thread limit and num teams expression were present, take the
+ // minimum.
+ if (ThreadLimitVal) {
+ NumThreadsVal = CGF.Builder.CreateSelect(
+ CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
+ ThreadLimitVal, NumThreadsVal);
+ }
+
+ return NumThreadsVal;
+}
+
namespace {
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
@@ -7689,7 +7389,14 @@ private:
} else if (FieldIndex < PartialStruct.LowestElem.first) {
PartialStruct.LowestElem = {FieldIndex, LowestElem};
} else if (FieldIndex > PartialStruct.HighestElem.first) {
- PartialStruct.HighestElem = {FieldIndex, LowestElem};
+ if (IsFinalArraySection) {
+ Address HB =
+ CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
+ .getAddress(CGF);
+ PartialStruct.HighestElem = {FieldIndex, HB};
+ } else {
+ PartialStruct.HighestElem = {FieldIndex, LowestElem};
+ }
}
}
@@ -7930,30 +7637,6 @@ private:
OpenMPOffloadMappingFlags::OMP_MAP_FROM;
}
- static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
- // Rotate by getFlagMemberOffset() bits.
- return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
- << getFlagMemberOffset());
- }
-
- static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
- OpenMPOffloadMappingFlags MemberOfFlag) {
- // If the entry is PTR_AND_OBJ but has not been marked with the special
- // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
- // marked as MEMBER_OF.
- if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
- Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
- static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
- (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
- OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
- return;
-
- // Reset the placeholder value to prepare the flag for the assignment of the
- // proper MEMBER_OF value.
- Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
- Flags |= MemberOfFlag;
- }
-
void getPlainLayout(const CXXRecordDecl *RD,
llvm::SmallVectorImpl<const FieldDecl *> &Layout,
bool AsBase) const {
@@ -8021,6 +7704,7 @@ private:
/// the device pointers info array.
void generateAllInfoForClauses(
ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
+ llvm::OpenMPIRBuilder &OMPBuilder,
const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
// We have to process the component lists that relate with the same
@@ -8355,7 +8039,7 @@ private:
if (PartialStruct.Base.isValid()) {
CurInfo.NonContigInfo.Dims.push_back(0);
emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
- /*IsMapThis*/ !VD, VD);
+ /*IsMapThis*/ !VD, OMPBuilder, VD);
}
// We need to append the results of this capture to what we already
@@ -8422,6 +8106,7 @@ public:
void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
MapFlagsArrayTy &CurTypes,
const StructRangeInfoTy &PartialStruct, bool IsMapThis,
+ llvm::OpenMPIRBuilder &OMPBuilder,
const ValueDecl *VD = nullptr,
bool NotTargetParams = true) const {
if (CurTypes.size() == 1 &&
@@ -8456,7 +8141,7 @@ public:
// of tofrom.
// Emit this[:1]
CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
- QualType Ty = MD->getThisType()->getPointeeType();
+ QualType Ty = MD->getFunctionObjectParameterType();
llvm::Value *Size =
CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
/*isSigned=*/true);
@@ -8509,9 +8194,9 @@ public:
// (except for PTR_AND_OBJ entries which do not have a placeholder value
// 0xFFFF in the MEMBER_OF field).
OpenMPOffloadMappingFlags MemberOfFlag =
- getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
+ OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
for (auto &M : CurTypes)
- setCorrectMemberOfFlag(M, MemberOfFlag);
+ OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
}
/// Generate all the base pointers, section pointers, sizes, map types, and
@@ -8520,23 +8205,26 @@ public:
/// pair of the relevant declaration and index where it occurs is appended to
/// the device pointers info array.
void generateAllInfo(
- MapCombinedInfoTy &CombinedInfo,
+ MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
assert(CurDir.is<const OMPExecutableDirective *>() &&
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
- generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
+ generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
+ SkipVarSet);
}
/// Generate all the base pointers, section pointers, sizes, map types, and
/// mappers for the extracted map clauses of user-defined mapper (all included
/// in \a CombinedInfo).
- void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
+ void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
+ llvm::OpenMPIRBuilder &OMPBuilder) const {
assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
"Expect a declare mapper directive");
const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
- generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
+ generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
+ OMPBuilder);
}
/// Emit capture info for lambdas for variables captured by reference.
@@ -8618,6 +8306,7 @@ public:
/// Set correct indices for lambdas captures.
void adjustMemberOfForLambdaCaptures(
+ llvm::OpenMPIRBuilder &OMPBuilder,
const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapFlagsArrayTy &Types) const {
@@ -8642,8 +8331,9 @@ public:
// All other current entries will be MEMBER_OF the combined entry
// (except for PTR_AND_OBJ entries which do not have a placeholder value
// 0xFFFF in the MEMBER_OF field).
- OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
- setCorrectMemberOfFlag(Types[I], MemberOfFlag);
+ OpenMPOffloadMappingFlags MemberOfFlag =
+ OMPBuilder.getMemberOfFlag(TgtIdx);
+ OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
}
}
@@ -9242,17 +8932,17 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
// Prepare mapper function arguments and attributes.
ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
FunctionArgList Args;
Args.push_back(&HandleArg);
Args.push_back(&BaseArg);
@@ -9265,7 +8955,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
SmallString<64> TyStr;
llvm::raw_svector_ostream Out(TyStr);
- CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
+ CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
std::string Name = getName({"omp_mapper", TyStr, D->getName()});
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
Name, &CGM.getModule());
@@ -9337,7 +9027,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
// Get map clause information. Fill up the arrays with all mapped variables.
MappableExprsHandler::MapCombinedInfoTy Info;
MappableExprsHandler MEHandler(*D, MapperCGF);
- MEHandler.generateAllInfoForMapper(Info);
+ MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
// Call the runtime API __tgt_mapper_num_components to get the number of
// pre-existing components.
@@ -9721,7 +9411,8 @@ static void emitTargetCallKernelLaunch(
CombinedInfo.append(PartialStruct.PreliminaryMapData);
MEHandler.emitCombinedEntry(
CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
- nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty());
+ OMPBuilder, nullptr,
+ !PartialStruct.PreliminaryMapData.BasePointers.empty());
}
// We need to append the results of this capture to what we already have.
@@ -9729,11 +9420,11 @@ static void emitTargetCallKernelLaunch(
}
// Adjust MEMBER_OF flags for the lambdas captures.
MEHandler.adjustMemberOfForLambdaCaptures(
- LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
- CombinedInfo.Types);
+ OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
+ CombinedInfo.Pointers, CombinedInfo.Types);
// Map any list items in a map clause that were not captures because they
// weren't referenced within the construct.
- MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
+ MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
CGOpenMPRuntime::TargetDataInfo Info;
// Fill up the arrays and create the arguments.
@@ -9858,9 +9549,13 @@ void CGOpenMPRuntime::emitTargetCall(
assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
- const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
- D.hasClausesOfKind<OMPNowaitClause>() ||
- D.hasClausesOfKind<OMPInReductionClause>();
+ const bool RequiresOuterTask =
+ D.hasClausesOfKind<OMPDependClause>() ||
+ D.hasClausesOfKind<OMPNowaitClause>() ||
+ D.hasClausesOfKind<OMPInReductionClause>() ||
+ (CGM.getLangOpts().OpenMP >= 51 &&
+ needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
+ D.hasClausesOfKind<OMPThreadLimitClause>());
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
@@ -10151,6 +9846,13 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+
+ // If this is an 'extern' declaration we defer to the canonical definition and
+ // do not emit an offloading entry.
+ if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
+ VD->hasExternalStorage())
+ return;
+
if (!Res) {
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
// Register non-target variables being emitted in device code (debug info
@@ -10163,7 +9865,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
auto LinkageForVariable = [&VD, this]() {
- return CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+ return CGM.getLLVMLinkageVarDefinition(VD);
};
std::vector<llvm::GlobalVariable *> GeneratedRefs;
@@ -10181,8 +9883,6 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
for (auto *ref : GeneratedRefs)
CGM.addCompilerUsedGlobal(ref);
-
- return;
}
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
@@ -10331,7 +10031,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
std::string ReqName = getName({"omp_offloading", "requires_reg"});
RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
- OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
// TODO: check for other requires clauses.
// The requires directive takes effect only when a target region is
// present in the compilation unit. Otherwise it is ignored and not
@@ -10341,11 +10040,10 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||
!OMPBuilder.OffloadInfoManager.empty()) &&
"Target or declare target region expected.");
- if (HasRequiresUnifiedSharedMemory)
- Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___tgt_register_requires),
- llvm::ConstantInt::get(CGM.Int64Ty, Flags));
+ llvm::ConstantInt::get(
+ CGM.Int64Ty, OMPBuilder.Config.getRequiresFlags()));
CGF.FinishFunction();
}
return RequiresRegFn;
@@ -10405,6 +10103,24 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
PushNumTeamsArgs);
}
+void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
+ const Expr *ThreadLimit,
+ SourceLocation Loc) {
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadLimitVal =
+ ThreadLimit
+ ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
+ CGF.CGM.Int32Ty, /* isSigned = */ true)
+ : CGF.Builder.getInt32(0);
+
+ // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
+ llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
+ ThreadLimitVal};
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
+ ThreadLimitArgs);
+}
+
void CGOpenMPRuntime::emitTargetDataCalls(
CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
const Expr *Device, const RegionCodeGenTy &CodeGen,
@@ -10417,11 +10133,6 @@ void CGOpenMPRuntime::emitTargetDataCalls(
PrePostActionTy NoPrivAction;
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
- CGF.AllocaInsertPt->getIterator());
- InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
- CGF.Builder.GetInsertPoint());
- llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
llvm::Value *IfCondVal = nullptr;
if (IfCond)
@@ -10443,7 +10154,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
CGF.Builder.restoreIP(CodeGenIP);
// Get map clause information.
MappableExprsHandler MEHandler(D, CGF);
- MEHandler.generateAllInfo(CombinedInfo);
+ MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
return emitMappingInformation(CGF, OMPBuilder, MapExpr);
@@ -10501,6 +10212,11 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Source location for the ident struct
llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
+ InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
+ CGF.AllocaInsertPt->getIterator());
+ InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
+ CGF.Builder.GetInsertPoint());
+ llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
CGF.Builder.restoreIP(OMPBuilder.createTargetData(
OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
@@ -10649,7 +10365,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
// Get map clause information.
MappableExprsHandler MEHandler(D, CGF);
- MEHandler.generateAllInfo(CombinedInfo);
+ MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
CGOpenMPRuntime::TargetDataInfo Info;
// Fill up the arrays and create the arguments.
@@ -11327,8 +11043,8 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
}
llvm::APInt Size(/*numBits=*/32, NumIterations.size());
- QualType ArrayTy =
- C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
+ QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
+ ArraySizeModifier::Normal, 0);
Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
CGF.EmitNullInitialization(DimsAddr, ArrayTy);
@@ -11380,7 +11096,7 @@ static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
QualType ArrayTy = CGM.getContext().getConstantArrayType(
- Int64Ty, Size, nullptr, ArrayType::Normal, 0);
+ Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
const Expr *CounterVal = C->getLoopData(I);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 2ee2a39ba538..b01b39abd160 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -311,6 +311,14 @@ protected:
/// An OpenMP-IR-Builder instance.
llvm::OpenMPIRBuilder OMPBuilder;
+ /// Helper to determine the min/max number of threads/teams for \p D.
+ void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D,
+ CodeGenFunction &CGF,
+ int32_t &MinThreadsVal,
+ int32_t &MaxThreadsVal,
+ int32_t &MinTeamsVal,
+ int32_t &MaxTeamsVal);
+
/// Helper to emit outlined function for 'target' directive.
/// \param D Directive to emit.
/// \param ParentName Name of the function that encloses the target region.
@@ -527,28 +535,6 @@ protected:
/// Returns pointer to kmpc_micro type.
llvm::Type *getKmpc_MicroPointerTy();
- /// Returns __kmpc_for_static_init_* runtime function for the specified
- /// size \a IVSize and sign \a IVSigned. Will create a distribute call
- /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
- llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize,
- bool IVSigned,
- bool IsGPUDistribute);
-
- /// Returns __kmpc_dispatch_init_* runtime function for the specified
- /// size \a IVSize and sign \a IVSigned.
- llvm::FunctionCallee createDispatchInitFunction(unsigned IVSize,
- bool IVSigned);
-
- /// Returns __kmpc_dispatch_next_* runtime function for the specified
- /// size \a IVSize and sign \a IVSigned.
- llvm::FunctionCallee createDispatchNextFunction(unsigned IVSize,
- bool IVSigned);
-
- /// Returns __kmpc_dispatch_fini_* runtime function for the specified
- /// size \a IVSize and sign \a IVSigned.
- llvm::FunctionCallee createDispatchFiniFunction(unsigned IVSize,
- bool IVSigned);
-
/// If the specified mangled name is not in the module, create and
/// return threadprivate cache object. This object is a pointer's worth of
/// storage that's reserved for use by the OpenMP runtime.
@@ -659,21 +645,23 @@ public:
/// Otherwise, return nullptr.
const Expr *getNumTeamsExprForTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
- int32_t &DefaultVal);
+ int32_t &MinTeamsVal,
+ int32_t &MaxTeamsVal);
llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &D);
- /// Emit the number of threads for a target directive. Inspect the
- /// thread_limit clause associated with a teams construct combined or closely
- /// nested with the target directive.
- ///
- /// Emit the num_threads clause for directives such as 'target parallel' that
- /// have no associated teams construct.
- ///
- /// Otherwise, return nullptr.
- const Expr *
- getNumThreadsExprForTargetDirective(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- int32_t &DefaultVal);
+
+ /// Check for a number of threads upper bound constant value (stored in \p
+ /// UpperBound), or expression (returned). If the value is conditional (via an
+ /// if-clause), store the condition in \p CondExpr. Similarly, a potential
+ /// thread limit expression is stored in \p ThreadLimitExpr. If \p
+ /// UpperBoundOnly is true, no expression evaluation is perfomed.
+ const Expr *getNumThreadsExprForTargetDirective(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ int32_t &UpperBound, bool UpperBoundOnly,
+ llvm::Value **CondExpr = nullptr, const Expr **ThreadLimitExpr = nullptr);
+
+ /// Emit an expression that denotes the number of threads a target region
+ /// shall use. Will generate "i32 0" to allow the runtime to choose.
llvm::Value *
emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &D);
@@ -1101,13 +1089,12 @@ public:
SourceLocation Loc, bool PerformInit,
CodeGenFunction *CGF = nullptr);
- /// Emit a code for initialization of declare target variable.
- /// \param VD Declare target variable.
- /// \param Addr Address of the global variable \a VD.
+ /// Emit code for handling declare target functions in the runtime.
+ /// \param FD Declare target function.
+ /// \param Addr Address of the global \a FD.
/// \param PerformInit true if initialization expression is not constant.
- virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD,
- llvm::GlobalVariable *Addr,
- bool PerformInit);
+ virtual void emitDeclareTargetFunction(const FunctionDecl *FD,
+ llvm::GlobalValue *GV);
/// Creates artificial threadprivate variable with name \p Name and type \p
/// VarType.
@@ -1449,6 +1436,14 @@ public:
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
const Expr *ThreadLimit, SourceLocation Loc);
+ /// Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 thread_limit) to generate code for
+ /// thread_limit clause on target directive
+ /// \param ThreadLimit An integer expression of threads.
+ virtual void emitThreadLimitClause(CodeGenFunction &CGF,
+ const Expr *ThreadLimit,
+ SourceLocation Loc);
+
/// Struct that keeps all the relevant information that should be kept
/// throughout a 'target data' region.
class TargetDataInfo : public llvm::OpenMPIRBuilder::TargetDataInfo {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 62aacb9e24d6..293ccaa3413c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -85,18 +85,6 @@ public:
~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
};
-/// GPU Configuration: This information can be derived from cuda registers,
-/// however, providing compile time constants helps generate more efficient
-/// code. For all practical purposes this is fine because the configuration
-/// is the same for all known NVPTX architectures.
-enum MachineConfiguration : unsigned {
- /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
- /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
-
- /// Global memory alignment for performance.
- GlobalMemoryAlignment = 128,
-};
-
static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
RefExpr = RefExpr->IgnoreParens();
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
@@ -119,31 +107,23 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
}
-
static RecordDecl *buildRecordForGlobalizedVars(
ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
- &MappedDeclsFields, int BufSize) {
+ &MappedDeclsFields,
+ int BufSize) {
using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>;
if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
return nullptr;
SmallVector<VarsDataTy, 4> GlobalizedVars;
for (const ValueDecl *D : EscapedDecls)
- GlobalizedVars.emplace_back(
- CharUnits::fromQuantity(std::max(
- C.getDeclAlign(D).getQuantity(),
- static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
- D);
+ GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
for (const ValueDecl *D : EscapedDeclsForTeams)
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
- llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
- return L.first > R.first;
- });
// Build struct _globalized_locals_ty {
- // /* globalized vars */[WarSize] align (max(decl_align,
- // GlobalMemoryAlignment))
+ // /* globalized vars */[WarSize] align (decl_align)
// /* globalized vars */ for EscapedDeclsForTeams
// };
RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
@@ -173,18 +153,18 @@ static RecordDecl *buildRecordForGlobalizedVars(
Field->addAttr(*I);
}
} else {
- llvm::APInt ArraySize(32, BufSize);
- Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal,
- 0);
+ if (BufSize > 1) {
+ llvm::APInt ArraySize(32, BufSize);
+ Type = C.getConstantArrayType(Type, ArraySize, nullptr,
+ ArraySizeModifier::Normal, 0);
+ }
Field = FieldDecl::Create(
C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
C.getTrivialTypeSourceInfo(Type, SourceLocation()),
/*BW=*/nullptr, /*Mutable=*/false,
/*InitStyle=*/ICIS_NoInit);
Field->setAccess(AS_public);
- llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
- static_cast<CharUnits::QuantityType>(
- GlobalMemoryAlignment)));
+ llvm::APInt Align(32, Pair.first.getQuantity());
Field->addAttr(AlignedAttr::CreateImplicit(
C, /*IsAlignmentExpr=*/true,
IntegerLiteral::Create(C, Align,
@@ -551,10 +531,9 @@ CGOpenMPRuntimeGPU::getExecutionMode() const {
return CurrentExecutionMode;
}
-static CGOpenMPRuntimeGPU::DataSharingMode
-getDataSharingMode(CodeGenModule &CGM) {
- return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
- : CGOpenMPRuntimeGPU::Generic;
+CGOpenMPRuntimeGPU::DataSharingMode
+CGOpenMPRuntimeGPU::getDataSharingMode() const {
+ return CurrentDataSharingMode;
}
/// Check for inner (nested) SPMD construct, if any
@@ -752,27 +731,30 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
EntryFunctionState EST;
WrapperFunctionsMap.clear();
+ [[maybe_unused]] bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
+ assert(!IsBareKernel && "bare kernel should not be at generic mode");
+
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+ const OMPExecutableDirective &D;
public:
- NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
- : EST(EST) {}
+ NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+ const OMPExecutableDirective &D)
+ : EST(EST), D(D) {}
void Enter(CodeGenFunction &CGF) override {
- auto &RT =
- static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
- RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
+ auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+ RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ false);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
- auto &RT =
- static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+ auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
}
- } Action(EST);
+ } Action(EST, D);
CodeGen.setAction(Action);
IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
@@ -780,10 +762,17 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
IsInTTDRegion = false;
}
-void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF,
+void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D,
+ CodeGenFunction &CGF,
EntryFunctionState &EST, bool IsSPMD) {
+ int32_t MinThreadsVal = 1, MaxThreadsVal = -1, MinTeamsVal = 1,
+ MaxTeamsVal = -1;
+ computeMinAndMaxThreadsAndTeams(D, CGF, MinThreadsVal, MaxThreadsVal,
+ MinTeamsVal, MaxTeamsVal);
+
CGBuilderTy &Bld = CGF.Builder;
- Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD));
+ Bld.restoreIP(OMPBuilder.createTargetInit(
+ Bld, IsSPMD, MinThreadsVal, MaxThreadsVal, MinTeamsVal, MaxTeamsVal));
if (!IsSPMD)
emitGenericVarsProlog(CGF, EST.Loc);
}
@@ -794,8 +783,34 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
if (!IsSPMD)
emitGenericVarsEpilog(CGF);
+ // This is temporary until we remove the fixed sized buffer.
+ ASTContext &C = CGM.getContext();
+ RecordDecl *StaticRD = C.buildImplicitRecord(
+ "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::Union);
+ StaticRD->startDefinition();
+ for (const RecordDecl *TeamReductionRec : TeamsReductions) {
+ QualType RecTy = C.getRecordType(TeamReductionRec);
+ auto *Field = FieldDecl::Create(
+ C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,
+ C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ StaticRD->addDecl(Field);
+ }
+ StaticRD->completeDefinition();
+ QualType StaticTy = C.getRecordType(StaticRD);
+ llvm::Type *LLVMReductionsBufferTy =
+ CGM.getTypes().ConvertTypeForMem(StaticTy);
+ const auto &DL = CGM.getModule().getDataLayout();
+ uint64_t ReductionDataSize =
+ TeamsReductions.empty()
+ ? 0
+ : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();
CGBuilderTy &Bld = CGF.Builder;
- OMPBuilder.createTargetDeinit(Bld, IsSPMD);
+ OMPBuilder.createTargetDeinit(Bld, ReductionDataSize,
+ C.getLangOpts().OpenMPCUDAReductionBufNum);
+ TeamsReductions.clear();
}
void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
@@ -807,25 +822,40 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
EntryFunctionState EST;
+ bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
+
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
CGOpenMPRuntimeGPU &RT;
CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+ bool IsBareKernel;
+ DataSharingMode Mode;
+ const OMPExecutableDirective &D;
public:
NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
- CGOpenMPRuntimeGPU::EntryFunctionState &EST)
- : RT(RT), EST(EST) {}
+ CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+ bool IsBareKernel, const OMPExecutableDirective &D)
+ : RT(RT), EST(EST), IsBareKernel(IsBareKernel),
+ Mode(RT.CurrentDataSharingMode), D(D) {}
void Enter(CodeGenFunction &CGF) override {
- RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
+ if (IsBareKernel) {
+ RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;
+ return;
+ }
+ RT.emitKernelInit(D, CGF, EST, /* IsSPMD */ true);
// Skip target region initialization.
RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
+ if (IsBareKernel) {
+ RT.CurrentDataSharingMode = Mode;
+ return;
+ }
RT.clearLocThreadIdInsertPt(CGF);
RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
}
- } Action(*this, EST);
+ } Action(*this, EST, IsBareKernel, D);
CodeGen.setAction(Action);
IsInTTDRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
@@ -833,24 +863,6 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
IsInTTDRegion = false;
}
-// Create a unique global variable to indicate the execution mode of this target
-// region. The execution mode is either 'generic', or 'spmd' depending on the
-// target directive. This variable is picked up by the offload library to setup
-// the device appropriately before kernel launch. If the execution mode is
-// 'generic', the runtime reserves one warp for the master, otherwise, all
-// warps participate in parallel work.
-static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
- bool Mode) {
- auto *GVMode = new llvm::GlobalVariable(
- CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::WeakAnyLinkage,
- llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
- : OMP_TGT_EXEC_MODE_GENERIC),
- Twine(Name, "_exec_mode"));
- GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility);
- CGM.addCompilerUsedGlobal(GVMode);
-}
-
void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -861,26 +873,30 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");
bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
- if (Mode)
+ bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
+ if (Mode || IsBareKernel)
emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
else
emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
-
- setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM) {
- llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsTargetDevice,
- isGPU(), hasRequiresUnifiedSharedMemory(),
- CGM.getLangOpts().OpenMPOffloadMandatory);
+ llvm::OpenMPIRBuilderConfig Config(
+ CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
+ CGM.getLangOpts().OpenMPOffloadMandatory,
+ /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
+ hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
OMPBuilder.setConfig(Config);
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
llvm_unreachable("OpenMP can only handle device code.");
+ if (CGM.getLangOpts().OpenMPCUDAMode)
+ CurrentDataSharingMode = CGOpenMPRuntimeGPU::DS_CUDA;
+
llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
if (CGM.getLangOpts().NoGPULib || CGM.getLangOpts().OMPHostIRFile.empty())
return;
@@ -900,11 +916,7 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
ProcBindKind ProcBind,
SourceLocation Loc) {
- // Do nothing in case of SPMD mode and L0 parallel.
- if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
- return;
-
- CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
+ // Nothing to do.
}
void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF,
@@ -1046,10 +1058,8 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
}
void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
- SourceLocation Loc,
- bool WithSPMDCheck) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
- getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+ SourceLocation Loc) {
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;
CGBuilderTy &Bld = CGF.Builder;
@@ -1158,10 +1168,8 @@ void CGOpenMPRuntimeGPU::getKmpcFreeShared(
{AddrSizePair.first, AddrSizePair.second});
}
-void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
- bool WithSPMDCheck) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
- getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF) {
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
@@ -1196,11 +1204,18 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
+ bool IsBareKernel = D.getSingleClause<OMPXBareClause>();
+
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
/*Name=*/".zero.addr");
CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+ // We don't emit any thread id function call in bare kernel, but because the
+ // outlined function has a pointer argument, we emit a nullptr here.
+ if (IsBareKernel)
+ OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
+ else
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -1405,9 +1420,7 @@ static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
CastTy->hasSignedIntegerRepresentation());
Address CastItem = CGF.CreateMemTemp(CastTy);
- Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()),
- Val->getType());
+ Address ValCastItem = CastItem.withElementType(Val->getType());
CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy,
LValueBaseInfo(AlignmentSource::Type),
TBAAAccessInfo());
@@ -1543,11 +1556,6 @@ enum CopyAction : unsigned {
RemoteLaneToThread,
// ThreadCopy: Make a copy of a Reduce list on the thread's stack.
ThreadCopy,
- // ThreadToScratchpad: Copy a team-reduced array to the scratchpad.
- ThreadToScratchpad,
- // ScratchpadToThread: Copy from a scratchpad array in global memory
- // containing team-reduced data to a thread's stack.
- ScratchpadToThread,
};
} // namespace
@@ -1569,13 +1577,10 @@ static void emitReductionListCopy(
CGBuilderTy &Bld = CGF.Builder;
llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
- llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
- llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
// Iterates, element-by-element, through the source Reduce list and
// make a copy.
unsigned Idx = 0;
- unsigned Size = Privates.size();
for (const Expr *Private : Privates) {
Address SrcElementAddr = Address::invalid();
Address DestElementAddr = Address::invalid();
@@ -1585,10 +1590,6 @@ static void emitReductionListCopy(
// Set to true to update the pointer in the dest Reduce list to a
// newly created element.
bool UpdateDestListPtr = false;
- // Increment the src or dest pointer to the scratchpad, for each
- // new element.
- bool IncrScratchpadSrc = false;
- bool IncrScratchpadDest = false;
QualType PrivatePtrType = C.getPointerType(Private->getType());
llvm::Type *PrivateLlvmPtrType = CGF.ConvertType(PrivatePtrType);
@@ -1624,49 +1625,6 @@ static void emitReductionListCopy(
PrivatePtrType->castAs<PointerType>());
break;
}
- case ThreadToScratchpad: {
- // Step 1.1: Get the address for the src element in the Reduce list.
- Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
- SrcElementAddr = CGF.EmitLoadOfPointer(
- SrcElementPtrAddr.withElementType(PrivateLlvmPtrType),
- PrivatePtrType->castAs<PointerType>());
-
- // Step 1.2: Get the address for dest element:
- // address = base + index * ElementSizeInChars.
- llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
- llvm::Value *CurrentOffset =
- Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
- llvm::Value *ScratchPadElemAbsolutePtrVal =
- Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
- ScratchPadElemAbsolutePtrVal =
- Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
- DestElementAddr = Address(ScratchPadElemAbsolutePtrVal, CGF.Int8Ty,
- C.getTypeAlignInChars(Private->getType()));
- IncrScratchpadDest = true;
- break;
- }
- case ScratchpadToThread: {
- // Step 1.1: Get the address for the src element in the scratchpad.
- // address = base + index * ElementSizeInChars.
- llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
- llvm::Value *CurrentOffset =
- Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
- llvm::Value *ScratchPadElemAbsolutePtrVal =
- Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
- ScratchPadElemAbsolutePtrVal =
- Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
- SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal, CGF.Int8Ty,
- C.getTypeAlignInChars(Private->getType()));
- IncrScratchpadSrc = true;
-
- // Step 1.2: Create a temporary to store the element in the destination
- // Reduce list.
- DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
- DestElementAddr =
- CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
- UpdateDestListPtr = true;
- break;
- }
}
// Regardless of src and dest of copy, we emit the load of src
@@ -1724,39 +1682,6 @@ static void emitReductionListCopy(
C.VoidPtrTy);
}
- // Step 4.1: Increment SrcBase/DestBase so that it points to the starting
- // address of the next element in scratchpad memory, unless we're currently
- // processing the last one. Memory alignment is also taken care of here.
- if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
- // FIXME: This code doesn't make any sense, it's trying to perform
- // integer arithmetic on pointers.
- llvm::Value *ScratchpadBasePtr =
- IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
- llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
- ScratchpadBasePtr = Bld.CreateNUWAdd(
- ScratchpadBasePtr,
- Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
-
- // Take care of global memory alignment for performance
- ScratchpadBasePtr = Bld.CreateNUWSub(
- ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateUDiv(
- ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
- ScratchpadBasePtr = Bld.CreateNUWAdd(
- ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateNUWMul(
- ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
-
- if (IncrScratchpadDest)
- DestBase =
- Address(ScratchpadBasePtr, CGF.VoidPtrTy, CGF.getPointerAlign());
- else /* IncrScratchpadSrc = true */
- SrcBase =
- Address(ScratchpadBasePtr, CGF.VoidPtrTy, CGF.getPointerAlign());
- }
-
++Idx;
}
}
@@ -1784,12 +1709,12 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
// At the stage of the computation when this function is called, partially
// aggregated values reside in the first lane of every active warp.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
// NumWarps: number of warps active in the parallel region. This could
// be smaller than 32 (max warps in a CTA) for partial block reduction.
ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getIntTypeForBitwidth(32, /* Signed */ true),
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
Args.push_back(&NumWarpsArg);
@@ -1914,12 +1839,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
{llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
// Casting to actual data type.
// MediumPtr = (CopyType*)MediumPtrAddr;
- Address MediumPtr(
- Bld.CreateBitCast(
- MediumPtrVal,
- CopyType->getPointerTo(
- MediumPtrVal->getType()->getPointerAddressSpace())),
- CopyType, Align);
+ Address MediumPtr(MediumPtrVal, CopyType, Align);
// elem = *elemptr
//*MediumPtr = elem
@@ -1966,12 +1886,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
TransferMedium->getValueType(), TransferMedium,
{llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
// SrcMediumVal = *SrcMediumPtr;
- Address SrcMediumPtr(
- Bld.CreateBitCast(
- SrcMediumPtrVal,
- CopyType->getPointerTo(
- SrcMediumPtrVal->getType()->getPointerAddressSpace())),
- CopyType, Align);
+ Address SrcMediumPtr(SrcMediumPtrVal, CopyType, Align);
// TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I
Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
@@ -2082,16 +1997,16 @@ static llvm::Function *emitShuffleAndReduceFunction(
// Thread local Reduce list used to host the values of data to be reduced.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
// Current lane id; could be logical.
ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
// Offset of the remote source lane relative to the current lane.
ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.ShortTy, ImplicitParamDecl::Other);
+ C.ShortTy, ImplicitParamKind::Other);
// Algorithm version. This is expected to be known at compile time.
ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.ShortTy, ImplicitParamDecl::Other);
+ C.ShortTy, ImplicitParamKind::Other);
FunctionArgList Args;
Args.push_back(&ReduceListArg);
Args.push_back(&LaneIDArg);
@@ -2243,13 +2158,13 @@ static llvm::Value *emitListToGlobalCopyFunction(
// Buffer: global reduction buffer.
ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
// Idx: index of the buffer.
ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
// ReduceList: thread local Reduce list.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
FunctionArgList Args;
Args.push_back(&BufferArg);
Args.push_back(&IdxArg);
@@ -2282,8 +2197,7 @@ static llvm::Value *emitListToGlobalCopyFunction(
llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
LLVMReductionsBufferTy->getPointerTo());
- llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
- CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+ llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
@@ -2301,12 +2215,12 @@ static llvm::Value *emitListToGlobalCopyFunction(
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
// Global = Buffer.VD[Idx];
const FieldDecl *FD = VarFieldMap.lookup(VD);
+ llvm::Value *BufferPtr =
+ Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
- CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+ CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
- llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
- GlobAddr.getPointer(), Idxs);
- GlobLVal.setAddress(Address(BufferPtr,
+ GlobLVal.setAddress(Address(GlobAddr.getPointer(),
CGF.ConvertTypeForMem(Private->getType()),
GlobAddr.getAlignment()));
switch (CGF.getEvaluationKind(Private->getType())) {
@@ -2356,13 +2270,13 @@ static llvm::Value *emitListToGlobalReduceFunction(
// Buffer: global reduction buffer.
ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
// Idx: index of the buffer.
ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
// ReduceList: thread local Reduce list.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
FunctionArgList Args;
Args.push_back(&BufferArg);
Args.push_back(&IdxArg);
@@ -2393,8 +2307,7 @@ static llvm::Value *emitListToGlobalReduceFunction(
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
auto IPriv = Privates.begin();
- llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
- CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+ llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
@@ -2403,12 +2316,13 @@ static llvm::Value *emitListToGlobalReduceFunction(
// Global = Buffer.VD[Idx];
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
const FieldDecl *FD = VarFieldMap.lookup(VD);
+ llvm::Value *BufferPtr =
+ Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
- CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+ CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
- llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
- GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
- CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
+ CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
+ C.VoidPtrTy);
if ((*IPriv)->getType()->isVariablyModifiedType()) {
// Store array size.
++Idx;
@@ -2450,13 +2364,13 @@ static llvm::Value *emitGlobalToListCopyFunction(
// Buffer: global reduction buffer.
ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
// Idx: index of the buffer.
ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
// ReduceList: thread local Reduce list.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
FunctionArgList Args;
Args.push_back(&BufferArg);
Args.push_back(&IdxArg);
@@ -2490,8 +2404,7 @@ static llvm::Value *emitGlobalToListCopyFunction(
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
LLVMReductionsBufferTy->getPointerTo());
- llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
- CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+ llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
@@ -2509,12 +2422,12 @@ static llvm::Value *emitGlobalToListCopyFunction(
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
// Global = Buffer.VD[Idx];
const FieldDecl *FD = VarFieldMap.lookup(VD);
+ llvm::Value *BufferPtr =
+ Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
- CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+ CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
- llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
- GlobAddr.getPointer(), Idxs);
- GlobLVal.setAddress(Address(BufferPtr,
+ GlobLVal.setAddress(Address(GlobAddr.getPointer(),
CGF.ConvertTypeForMem(Private->getType()),
GlobAddr.getAlignment()));
switch (CGF.getEvaluationKind(Private->getType())) {
@@ -2564,13 +2477,13 @@ static llvm::Value *emitGlobalToListReduceFunction(
// Buffer: global reduction buffer.
ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
// Idx: index of the buffer.
ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
// ReduceList: thread local Reduce list.
ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
+ C.VoidPtrTy, ImplicitParamKind::Other);
FunctionArgList Args;
Args.push_back(&BufferArg);
Args.push_back(&IdxArg);
@@ -2601,8 +2514,7 @@ static llvm::Value *emitGlobalToListReduceFunction(
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
auto IPriv = Privates.begin();
- llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
- CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+ llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
/*Volatile=*/false, C.IntTy,
Loc)};
unsigned Idx = 0;
@@ -2611,12 +2523,13 @@ static llvm::Value *emitGlobalToListReduceFunction(
// Global = Buffer.VD[Idx];
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
const FieldDecl *FD = VarFieldMap.lookup(VD);
+ llvm::Value *BufferPtr =
+ Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
LValue GlobLVal = CGF.EmitLValueForField(
- CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+ CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
Address GlobAddr = GlobLVal.getAddress(CGF);
- llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
- GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
- CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
+ CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
+ C.VoidPtrTy);
if ((*IPriv)->getType()->isVariablyModifiedType()) {
// Store array size.
++Idx;
@@ -2907,15 +2820,25 @@ void CGOpenMPRuntimeGPU::emitReduction(
assert((TeamsReduction || ParallelReduction) &&
"Invalid reduction selection in emitReduction.");
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
+ llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size());
+ int Cnt = 0;
+ for (const Expr *DRE : Privates) {
+ PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
+ ++Cnt;
+ }
+
+ ASTContext &C = CGM.getContext();
+ const RecordDecl *ReductionRec = ::buildRecordForGlobalizedVars(
+ CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap, 1);
+
// Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
// RedList, shuffle_reduce_func, interwarp_copy_func);
// or
// Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>);
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
- llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::Value *Res;
- ASTContext &C = CGM.getContext();
// 1. Build a list of reduction variables.
// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
auto Size = RHSExprs.size();
@@ -2925,9 +2848,9 @@ void CGOpenMPRuntimeGPU::emitReduction(
++Size;
}
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
- QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType ReductionArrayTy = C.getConstantArrayType(
+ C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
+ /*IndexTypeQuals=*/0);
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
auto IPriv = Privates.begin();
@@ -2957,19 +2880,17 @@ void CGOpenMPRuntimeGPU::emitReduction(
llvm::Function *ReductionFn = emitReductionFunction(
CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
Privates, LHSExprs, RHSExprs, ReductionOps);
- llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ llvm::Value *ReductionDataSize =
+ CGF.getTypeSize(C.getRecordType(ReductionRec));
+ ReductionDataSize =
+ CGF.Builder.CreateSExtOrTrunc(ReductionDataSize, CGF.Int64Ty);
llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
llvm::Value *InterWarpCopyFn =
emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
if (ParallelReduction) {
- llvm::Value *Args[] = {RTLoc,
- ThreadId,
- CGF.Builder.getInt32(RHSExprs.size()),
- ReductionArrayTySize,
- RL,
- ShuffleAndReduceFn,
+ llvm::Value *Args[] = {RTLoc, ReductionDataSize, RL, ShuffleAndReduceFn,
InterWarpCopyFn};
Res = CGF.EmitRuntimeCall(
@@ -2978,42 +2899,27 @@ void CGOpenMPRuntimeGPU::emitReduction(
Args);
} else {
assert(TeamsReduction && "expected teams reduction.");
- llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
- llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size());
- int Cnt = 0;
- for (const Expr *DRE : Privates) {
- PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
- ++Cnt;
- }
- const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
- CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap,
- C.getLangOpts().OpenMPCUDAReductionBufNum);
- TeamsReductions.push_back(TeamReductionRec);
- if (!KernelTeamsReductionPtr) {
- KernelTeamsReductionPtr = new llvm::GlobalVariable(
- CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
- llvm::GlobalValue::InternalLinkage, nullptr,
- "_openmp_teams_reductions_buffer_$_$ptr");
- }
- llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar(
- Address(KernelTeamsReductionPtr, CGF.VoidPtrTy, CGM.getPointerAlign()),
- /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+ TeamsReductions.push_back(ReductionRec);
+ auto *KernelTeamsReductionPtr = CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_reduction_get_fixed_buffer),
+ {}, "_openmp_teams_reductions_buffer_$_$ptr");
llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction(
- CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
+ CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap);
llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction(
- CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
+ CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap,
ReductionFn);
llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction(
- CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
+ CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap);
llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction(
- CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
+ CGM, Privates, ReductionArrayTy, Loc, ReductionRec, VarFieldMap,
ReductionFn);
llvm::Value *Args[] = {
RTLoc,
- ThreadId,
- GlobalBufferPtr,
+ KernelTeamsReductionPtr,
CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum),
+ ReductionDataSize,
RL,
ShuffleAndReduceFn,
InterWarpCopyFn,
@@ -3055,14 +2961,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
++IRHS;
}
};
- llvm::Value *EndArgs[] = {ThreadId};
RegionCodeGenTy RCG(CodeGen);
- NVPTXActionTy Action(
- nullptr, std::nullopt,
- OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait),
- EndArgs);
- RCG.setAction(Action);
RCG(CGF);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
@@ -3092,7 +2991,7 @@ CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD,
if (isa<ImplicitParamDecl>(NativeParam))
return ImplicitParamDecl::Create(
CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
- NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
+ NativeParam->getIdentifier(), ArgType, ImplicitParamKind::Other);
return ParmVarDecl::Create(
CGM.getContext(),
const_cast<DeclContext *>(NativeParam->getDeclContext()),
@@ -3118,11 +3017,7 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
QualType TargetTy = TargetParam->getType();
llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(LocalAddr, /*Volatile=*/false,
TargetTy, SourceLocation());
- // First cast to generic.
- TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- TargetAddr,
- llvm::PointerType::get(CGF.getLLVMContext(), /*AddrSpace=*/0));
- // Cast from generic to native address space.
+ // Cast to native address space.
TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TargetAddr,
llvm::PointerType::get(CGF.getLLVMContext(), NativePointeeAddrSpace));
@@ -3149,11 +3044,8 @@ void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall(
TargetArgs.emplace_back(NativeArg);
continue;
}
- llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- NativeArg,
- llvm::PointerType::get(CGF.getLLVMContext(), /*AddrSpace*/ 0));
TargetArgs.emplace_back(
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NativeArg, TargetType));
}
CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
}
@@ -3175,10 +3067,10 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
/*Id=*/nullptr, Int16QTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
/*Id=*/nullptr, Int32QTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
WrapperArgs.emplace_back(&ParallelLevelArg);
WrapperArgs.emplace_back(&WrapperArg);
@@ -3291,7 +3183,7 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
const Decl *D) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return;
assert(D && "Expected function or captured|block decl.");
@@ -3343,13 +3235,13 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
Data.insert(std::make_pair(VD, MappedVarData()));
}
if (!NeedToDelayGlobalization) {
- emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
+ emitGenericVarsProlog(CGF, D->getBeginLoc());
struct GlobalizationScope final : EHScopeStack::Cleanup {
GlobalizationScope() = default;
void Emit(CodeGenFunction &CGF, Flags flags) override {
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true);
+ .emitGenericVarsEpilog(CGF);
}
};
CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
@@ -3400,7 +3292,7 @@ Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
VarTy, Align);
}
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+ if (getDataSharingMode() != CGOpenMPRuntimeGPU::DS_Generic)
return Address::invalid();
VD = VD->getCanonicalDecl();
@@ -3633,6 +3525,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX1103:
case CudaArch::GFX1150:
case CudaArch::GFX1151:
+ case CudaArch::GFX1200:
+ case CudaArch::GFX1201:
case CudaArch::Generic:
case CudaArch::UNUSED:
case CudaArch::UNKNOWN:
@@ -3645,42 +3539,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
CGOpenMPRuntime::processRequiresDirective(D);
}
-void CGOpenMPRuntimeGPU::clear() {
-
- if (!TeamsReductions.empty()) {
- ASTContext &C = CGM.getContext();
- RecordDecl *StaticRD = C.buildImplicitRecord(
- "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union);
- StaticRD->startDefinition();
- for (const RecordDecl *TeamReductionRec : TeamsReductions) {
- QualType RecTy = C.getRecordType(TeamReductionRec);
- auto *Field = FieldDecl::Create(
- C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,
- C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false,
- /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- StaticRD->addDecl(Field);
- }
- StaticRD->completeDefinition();
- QualType StaticTy = C.getRecordType(StaticRD);
- llvm::Type *LLVMReductionsBufferTy =
- CGM.getTypes().ConvertTypeForMem(StaticTy);
- // FIXME: nvlink does not handle weak linkage correctly (object with the
- // different size are reported as erroneous).
- // Restore CommonLinkage as soon as nvlink is fixed.
- auto *GV = new llvm::GlobalVariable(
- CGM.getModule(), LLVMReductionsBufferTy,
- /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
- llvm::Constant::getNullValue(LLVMReductionsBufferTy),
- "_openmp_teams_reductions_buffer_$_");
- KernelTeamsReductionPtr->setInitializer(
- llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
- CGM.VoidPtrTy));
- }
- CGOpenMPRuntime::clear();
-}
-
llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Module *M = &CGF.CGM.getModule();
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index dddfe5a94dcc..141436f26230 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -32,6 +32,18 @@ public:
/// Unknown execution mode (orphaned directive).
EM_Unknown,
};
+
+ /// Target codegen is specialized based on two data-sharing modes: CUDA, in
+ /// which the local variables are actually global threadlocal, and Generic, in
+ /// which the local variables are placed in global memory if they may escape
+ /// their declaration context.
+ enum DataSharingMode {
+ /// CUDA data sharing mode.
+ DS_CUDA,
+ /// Generic data-sharing mode.
+ DS_Generic,
+ };
+
private:
/// Parallel outlined function work for workers to execute.
llvm::SmallVector<llvm::Function *, 16> Work;
@@ -42,23 +54,24 @@ private:
ExecutionMode getExecutionMode() const;
+ DataSharingMode getDataSharingMode() const;
+
/// Get barrier to synchronize all threads in a block.
void syncCTAThreads(CodeGenFunction &CGF);
/// Helper for target directive initialization.
- void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST,
- bool IsSPMD);
+ void emitKernelInit(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+ EntryFunctionState &EST, bool IsSPMD);
/// Helper for target directive finalization.
void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
bool IsSPMD);
/// Helper for generic variables globalization prolog.
- void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
- bool WithSPMDCheck = false);
+ void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
/// Helper for generic variables globalization epilog.
- void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
+ void emitGenericVarsEpilog(CodeGenFunction &CGF);
//
// Base class overrides.
@@ -117,7 +130,6 @@ protected:
public:
explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
- void clear() override;
bool isGPU() const override { return true; };
@@ -297,17 +309,6 @@ public:
Address getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) override;
- /// Target codegen is specialized based on two data-sharing modes: CUDA, in
- /// which the local variables are actually global threadlocal, and Generic, in
- /// which the local variables are placed in global memory if they may escape
- /// their declaration context.
- enum DataSharingMode {
- /// CUDA data sharing mode.
- CUDA,
- /// Generic data-sharing mode.
- Generic,
- };
-
/// Cleans up references to the objects in finished function.
///
void functionFinished(CodeGenFunction &CGF) override;
@@ -343,6 +344,10 @@ private:
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;
+ /// Track the data sharing mode when codegening directives within a target
+ /// region.
+ DataSharingMode CurrentDataSharingMode = DataSharingMode::DS_Generic;
+
/// true if currently emitting code for target/teams/distribute region, false
/// - otherwise.
bool IsInTTDRegion = false;
@@ -380,7 +385,6 @@ private:
/// Maps the function to the list of the globalized variables with their
/// addresses.
llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
- llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
/// List of the records with the list of fields for the reductions across the
/// teams. Used to build the intermediate buffer for the fast teams
/// reductions.
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 888b7ddcccd3..cbfa79e10bfe 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -658,12 +658,13 @@ void CGRecordLowering::computeVolatileBitfields() {
void CGRecordLowering::accumulateVPtrs() {
if (Layout.hasOwnVFPtr())
- Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr,
- llvm::FunctionType::get(getIntNType(32), /*isVarArg=*/true)->
- getPointerTo()->getPointerTo()));
+ Members.push_back(
+ MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr,
+ llvm::PointerType::getUnqual(Types.getLLVMContext())));
if (Layout.hasOwnVBPtr())
- Members.push_back(MemberInfo(Layout.getVBPtrOffset(), MemberInfo::VBPtr,
- llvm::Type::getInt32PtrTy(Types.getLLVMContext())));
+ Members.push_back(
+ MemberInfo(Layout.getVBPtrOffset(), MemberInfo::VBPtr,
+ llvm::PointerType::getUnqual(Types.getLLVMContext())));
}
void CGRecordLowering::accumulateVBases() {
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 2184b8600d76..a5cb80640641 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -407,8 +407,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
EmitOMPInteropDirective(cast<OMPInteropDirective>(*S));
break;
case Stmt::OMPDispatchDirectiveClass:
- llvm_unreachable("Dispatch directive not supported yet.");
+ CGM.ErrorUnsupported(S, "OpenMP dispatch directive");
break;
+ case Stmt::OMPScopeDirectiveClass:
+ llvm_unreachable("scope not supported with FE outlining");
case Stmt::OMPMaskedDirectiveClass:
EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S));
break;
@@ -1297,8 +1299,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr);
assert(ReturnLocation.isValid() && "No valid return location");
- Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy),
- ReturnLocation);
+ Builder.CreateStore(SLocPtr, ReturnLocation);
}
// Returning from an outlined SEH helper is UB, and we already warn on it.
@@ -2418,6 +2419,24 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
}
}
+static void EmitHipStdParUnsupportedAsm(CodeGenFunction *CGF,
+ const AsmStmt &S) {
+ constexpr auto Name = "__ASM__hipstdpar_unsupported";
+
+ StringRef Asm;
+ if (auto GCCAsm = dyn_cast<GCCAsmStmt>(&S))
+ Asm = GCCAsm->getAsmString()->getString();
+
+ auto &Ctx = CGF->CGM.getLLVMContext();
+
+ auto StrTy = llvm::ConstantDataArray::getString(Ctx, Asm);
+ auto FnTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
+ {StrTy->getType()}, false);
+ auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
+
+ CGF->Builder.CreateCall(UBF, {StrTy});
+}
+
void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
// Pop all cleanup blocks at the end of the asm statement.
CodeGenFunction::RunCleanupsScope Cleanups(*this);
@@ -2429,27 +2448,38 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos;
- for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
+ bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
+ bool IsValidTargetAsm = true;
+ for (unsigned i = 0, e = S.getNumOutputs(); i != e && IsValidTargetAsm; i++) {
StringRef Name;
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
Name = GAS->getOutputName(i);
TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
- assert(IsValid && "Failed to parse output constraint");
+ if (IsHipStdPar && !IsValid)
+ IsValidTargetAsm = false;
+ else
+ assert(IsValid && "Failed to parse output constraint");
OutputConstraintInfos.push_back(Info);
}
- for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
+ for (unsigned i = 0, e = S.getNumInputs(); i != e && IsValidTargetAsm; i++) {
StringRef Name;
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
Name = GAS->getInputName(i);
TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
bool IsValid =
getTarget().validateInputConstraint(OutputConstraintInfos, Info);
- assert(IsValid && "Failed to parse input constraint"); (void)IsValid;
+ if (IsHipStdPar && !IsValid)
+ IsValidTargetAsm = false;
+ else
+ assert(IsValid && "Failed to parse input constraint");
InputConstraintInfos.push_back(Info);
}
+ if (!IsValidTargetAsm)
+ return EmitHipStdParUnsupportedAsm(this, S);
+
std::string Constraints;
std::vector<LValue> ResultRegDests;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 4910ff6865e4..ed426098ac69 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -485,7 +485,7 @@ static llvm::Function *emitOutlinedFunctionPrologue(
if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
II, ArgType,
- ImplicitParamDecl::ThreadPrivateVar);
+ ImplicitParamKind::ThreadPrivateVar);
} else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
Arg = ParmVarDecl::Create(
Ctx, DebugFunctionDecl,
@@ -494,7 +494,7 @@ static llvm::Function *emitOutlinedFunctionPrologue(
/*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
} else {
Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
- II, ArgType, ImplicitParamDecl::Other);
+ II, ArgType, ImplicitParamKind::Other);
}
Args.emplace_back(Arg);
// Do not cast arguments if we emit function with non-original types.
@@ -667,11 +667,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
I->second.first ? I->second.first->getType() : Arg->getType(),
AlignmentSource::Decl);
if (LV.getType()->isAnyComplexType())
- LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- LV.getAddress(WrapperCGF),
- PI->getType()->getPointerTo(
- LV.getAddress(WrapperCGF).getAddressSpace()),
- PI->getType()));
+ LV.setAddress(LV.getAddress(WrapperCGF).withElementType(PI->getType()));
CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
} else {
auto EI = VLASizes.find(Arg);
@@ -2562,9 +2558,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
(void)CGF.EmitOMPLinearClauseInit(S);
{
CodeGenFunction::OMPPrivateScope LoopScope(CGF);
+ CGF.EmitOMPPrivateClause(S, LoopScope);
CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
CGF.EmitOMPLinearClause(S, LoopScope);
- CGF.EmitOMPPrivateClause(S, LoopScope);
CGF.EmitOMPReductionClauseInit(S, LoopScope);
CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
CGF, S, CGF.EmitLValue(S.getIterationVariable()));
@@ -4828,8 +4824,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
}
auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
ParamTypes, /*isVarArg=*/false);
- CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CopyFn, CopyFnTy->getPointerTo());
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
for (const auto &Pair : LastprivateDstsOrigs) {
@@ -4991,18 +4985,18 @@ createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
QualType Ty, CapturedDecl *CD,
SourceLocation Loc) {
auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
auto *OrigRef = DeclRefExpr::Create(
C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
/*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
auto *PrivateRef = DeclRefExpr::Create(
C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
/*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
QualType ElemType = C.getBaseElementType(Ty);
auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
auto *InitRef = DeclRefExpr::Create(
C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
/*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
@@ -5062,7 +5056,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
- getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
+ getContext().VoidPtrTy, ArrSize, nullptr, ArraySizeModifier::Normal,
/*IndexTypeQuals=*/0);
BPVD = createImplicitFirstprivateForType(
getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
@@ -5070,7 +5064,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
QualType SizesType = getContext().getConstantArrayType(
getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
- ArrSize, nullptr, ArrayType::Normal,
+ ArrSize, nullptr, ArraySizeModifier::Normal,
/*IndexTypeQuals=*/0);
SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
S.getBeginLoc());
@@ -5115,8 +5109,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
ParamTypes, /*isVarArg=*/false);
- CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CopyFn, CopyFnTy->getPointerTo());
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
for (const auto &Pair : PrivatePtrs) {
@@ -5143,6 +5135,15 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
Action.Enter(CGF);
OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
+ auto *TL = S.getSingleClause<OMPThreadLimitClause>();
+ if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
+ needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) {
+ // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
+ // enclosing this target region. This will indirectly set the thread_limit
+ // for every applicable construct within target region.
+ CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
+ CGF, TL->getThreadLimit(), S.getBeginLoc());
+ }
BodyGen(CGF);
};
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
@@ -6205,7 +6206,7 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
X.getAddress(CGF).getElementType());
}
llvm::Value *Res =
- CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
+ CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(CGF), UpdateVal, AO);
return std::make_pair(true, RValue::get(Res));
}
@@ -6507,6 +6508,10 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
IsPostfixUpdate, IsFailOnly, Loc);
break;
}
+ case OMPC_fail: {
+ //TODO
+ break;
+ }
default:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
@@ -8055,7 +8060,8 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
D.getDirectiveKind() == OMPD_critical ||
D.getDirectiveKind() == OMPD_section ||
D.getDirectiveKind() == OMPD_master ||
- D.getDirectiveKind() == OMPD_masked) {
+ D.getDirectiveKind() == OMPD_masked ||
+ D.getDirectiveKind() == OMPD_unroll) {
EmitStmt(D.getAssociatedStmt());
} else {
auto LPCRegion =
diff --git a/clang/lib/CodeGen/CGVTT.cpp b/clang/lib/CodeGen/CGVTT.cpp
index 22790147c6f5..1d3f14f1c534 100644
--- a/clang/lib/CodeGen/CGVTT.cpp
+++ b/clang/lib/CodeGen/CGVTT.cpp
@@ -93,6 +93,11 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
if (CGM.supportsCOMDAT() && VTT->isWeakForLinker())
VTT->setComdat(CGM.getModule().getOrInsertComdat(VTT->getName()));
+
+ // Set the visibility. This will already have been set on the VTT declaration.
+ // Set it again, now that we have a definition, as the implicit visibility can
+ // apply differently to definitions.
+ CGM.setGVProperties(VTT, RD);
}
llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp
index 91dd7a8e046b..27a2cab4f753 100644
--- a/clang/lib/CodeGen/CGVTables.cpp
+++ b/clang/lib/CodeGen/CGVTables.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include <algorithm>
#include <cstdio>
+#include <utility>
using namespace clang;
using namespace CodeGen;
@@ -201,7 +202,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
// Find the first store of "this", which will be to the alloca associated
// with "this".
Address ThisPtr =
- Address(&*AI, ConvertTypeForMem(MD->getThisType()->getPointeeType()),
+ Address(&*AI, ConvertTypeForMem(MD->getFunctionObjectParameterType()),
CGM.getClassPointerAlignment(MD->getParent()));
llvm::BasicBlock *EntryBB = &Fn->front();
llvm::BasicBlock::iterator ThisStore =
@@ -464,10 +465,6 @@ void CodeGenFunction::generateThunk(llvm::Function *Fn,
llvm::Constant *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true);
- // Fix up the function type for an unprototyped musttail call.
- if (IsUnprototyped)
- Callee = llvm::ConstantExpr::getBitCast(Callee, Fn->getType());
-
// Make the call and return the result.
EmitCallAndReturnForThunk(llvm::FunctionCallee(Fn->getFunctionType(), Callee),
&Thunk, IsUnprototyped);
@@ -536,11 +533,8 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD,
Name.str(), &CGM.getModule());
CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn, /*IsThunk=*/false);
- // If needed, replace the old thunk with a bitcast.
if (!OldThunkFn->use_empty()) {
- llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(ThunkFn, OldThunkFn->getType());
- OldThunkFn->replaceAllUsesWith(NewPtrForOldDecl);
+ OldThunkFn->replaceAllUsesWith(ThunkFn);
}
// Remove the old thunk.
@@ -639,8 +633,16 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder,
// want the stub/proxy to be emitted for properly calculating the offset.
// Examples where there would be no symbol emitted are available_externally
// and private linkages.
- auto stubLinkage = vtableHasLocalLinkage ? llvm::GlobalValue::InternalLinkage
- : llvm::GlobalValue::ExternalLinkage;
+ //
+ // `internal` linkage results in STB_LOCAL Elf binding while still manifesting a
+ // local symbol.
+ //
+ // `linkonce_odr` linkage results in a STB_DEFAULT Elf binding but also allows for
+ // the rtti_proxy to be transparently replaced with a GOTPCREL reloc by a
+ // target that supports this replacement.
+ auto stubLinkage = vtableHasLocalLinkage
+ ? llvm::GlobalValue::InternalLinkage
+ : llvm::GlobalValue::LinkOnceODRLinkage;
llvm::Constant *target;
if (auto *func = dyn_cast<llvm::Function>(globalVal)) {
@@ -1303,48 +1305,42 @@ llvm::GlobalObject::VCallVisibility CodeGenModule::GetVCallVisibilityLevel(
void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
llvm::GlobalVariable *VTable,
const VTableLayout &VTLayout) {
- if (!getCodeGenOpts().LTOUnit)
+ // Emit type metadata on vtables with LTO or IR instrumentation.
+ // In IR instrumentation, the type metadata is used to find out vtable
+ // definitions (for type profiling) among all global variables.
+ if (!getCodeGenOpts().LTOUnit && !getCodeGenOpts().hasProfileIRInstr())
return;
CharUnits ComponentWidth = GetTargetTypeStoreSize(getVTableComponentType());
- typedef std::pair<const CXXRecordDecl *, unsigned> AddressPoint;
+ struct AddressPoint {
+ const CXXRecordDecl *Base;
+ size_t Offset;
+ std::string TypeName;
+ bool operator<(const AddressPoint &RHS) const {
+ int D = TypeName.compare(RHS.TypeName);
+ return D < 0 || (D == 0 && Offset < RHS.Offset);
+ }
+ };
std::vector<AddressPoint> AddressPoints;
- for (auto &&AP : VTLayout.getAddressPoints())
- AddressPoints.push_back(std::make_pair(
- AP.first.getBase(), VTLayout.getVTableOffset(AP.second.VTableIndex) +
- AP.second.AddressPointIndex));
+ for (auto &&AP : VTLayout.getAddressPoints()) {
+ AddressPoint N{AP.first.getBase(),
+ VTLayout.getVTableOffset(AP.second.VTableIndex) +
+ AP.second.AddressPointIndex,
+ {}};
+ llvm::raw_string_ostream Stream(N.TypeName);
+ getCXXABI().getMangleContext().mangleCanonicalTypeName(
+ QualType(N.Base->getTypeForDecl(), 0), Stream);
+ AddressPoints.push_back(std::move(N));
+ }
// Sort the address points for determinism.
- llvm::sort(AddressPoints, [this](const AddressPoint &AP1,
- const AddressPoint &AP2) {
- if (&AP1 == &AP2)
- return false;
-
- std::string S1;
- llvm::raw_string_ostream O1(S1);
- getCXXABI().getMangleContext().mangleTypeName(
- QualType(AP1.first->getTypeForDecl(), 0), O1);
- O1.flush();
-
- std::string S2;
- llvm::raw_string_ostream O2(S2);
- getCXXABI().getMangleContext().mangleTypeName(
- QualType(AP2.first->getTypeForDecl(), 0), O2);
- O2.flush();
-
- if (S1 < S2)
- return true;
- if (S1 != S2)
- return false;
-
- return AP1.second < AP2.second;
- });
+ llvm::sort(AddressPoints);
ArrayRef<VTableComponent> Comps = VTLayout.vtable_components();
for (auto AP : AddressPoints) {
// Create type metadata for the address point.
- AddVTableTypeMetadata(VTable, ComponentWidth * AP.second, AP.first);
+ AddVTableTypeMetadata(VTable, ComponentWidth * AP.Offset, AP.Base);
// The class associated with each address point could also potentially be
// used for indirect calls via a member function pointer, so we need to
@@ -1356,7 +1352,7 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD,
llvm::Metadata *MD = CreateMetadataIdentifierForVirtualMemPtrType(
Context.getMemberPointerType(
Comps[I].getFunctionDecl()->getType(),
- Context.getRecordType(AP.first).getTypePtr()));
+ Context.getRecordType(AP.Base).getTypePtr()));
VTable->addTypeMetadata((ComponentWidth * I).getQuantity(), MD);
}
}
diff --git a/clang/lib/CodeGen/CodeGenABITypes.cpp b/clang/lib/CodeGen/CodeGenABITypes.cpp
index d3a16a1d5acc..a6073e1188d6 100644
--- a/clang/lib/CodeGen/CodeGenABITypes.cpp
+++ b/clang/lib/CodeGen/CodeGenABITypes.cpp
@@ -65,9 +65,8 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM,
ArrayRef<CanQualType> argTypes,
FunctionType::ExtInfo info,
RequiredArgs args) {
- return CGM.getTypes().arrangeLLVMFunctionInfo(
- returnType, /*instanceMethod=*/false, /*chainCall=*/false, argTypes,
- info, {}, args);
+ return CGM.getTypes().arrangeLLVMFunctionInfo(returnType, FnInfoOpts::None,
+ argTypes, info, {}, args);
}
ImplicitCXXConstructorArgs
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index a3b72381d73f..bb6b1a3bc228 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/CodeGenAction.h"
+#include "BackendConsumer.h"
#include "CGCall.h"
#include "CodeGenModule.h"
#include "CoverageMappingGen.h"
@@ -48,428 +49,374 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Utils/Cloning.h"
-#include <memory>
#include <optional>
using namespace clang;
using namespace llvm;
#define DEBUG_TYPE "codegenaction"
-namespace clang {
- class BackendConsumer;
- class ClangDiagnosticHandler final : public DiagnosticHandler {
- public:
- ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon)
- : CodeGenOpts(CGOpts), BackendCon(BCon) {}
-
- bool handleDiagnostics(const DiagnosticInfo &DI) override;
-
- bool isAnalysisRemarkEnabled(StringRef PassName) const override {
- return CodeGenOpts.OptimizationRemarkAnalysis.patternMatches(PassName);
- }
- bool isMissedOptRemarkEnabled(StringRef PassName) const override {
- return CodeGenOpts.OptimizationRemarkMissed.patternMatches(PassName);
- }
- bool isPassedOptRemarkEnabled(StringRef PassName) const override {
- return CodeGenOpts.OptimizationRemark.patternMatches(PassName);
- }
-
- bool isAnyRemarkEnabled() const override {
- return CodeGenOpts.OptimizationRemarkAnalysis.hasValidPattern() ||
- CodeGenOpts.OptimizationRemarkMissed.hasValidPattern() ||
- CodeGenOpts.OptimizationRemark.hasValidPattern();
- }
+namespace llvm {
+extern cl::opt<bool> ClRelinkBuiltinBitcodePostop;
+}
- private:
- const CodeGenOptions &CodeGenOpts;
- BackendConsumer *BackendCon;
- };
+namespace clang {
+class BackendConsumer;
+class ClangDiagnosticHandler final : public DiagnosticHandler {
+public:
+ ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon)
+ : CodeGenOpts(CGOpts), BackendCon(BCon) {}
- static void reportOptRecordError(Error E, DiagnosticsEngine &Diags,
- const CodeGenOptions &CodeGenOpts) {
- handleAllErrors(
- std::move(E),
- [&](const LLVMRemarkSetupFileError &E) {
- Diags.Report(diag::err_cannot_open_file)
- << CodeGenOpts.OptRecordFile << E.message();
- },
- [&](const LLVMRemarkSetupPatternError &E) {
- Diags.Report(diag::err_drv_optimization_remark_pattern)
- << E.message() << CodeGenOpts.OptRecordPasses;
- },
- [&](const LLVMRemarkSetupFormatError &E) {
- Diags.Report(diag::err_drv_optimization_remark_format)
- << CodeGenOpts.OptRecordFormat;
- });
- }
+ bool handleDiagnostics(const DiagnosticInfo &DI) override;
- class BackendConsumer : public ASTConsumer {
- using LinkModule = CodeGenAction::LinkModule;
+ bool isAnalysisRemarkEnabled(StringRef PassName) const override {
+ return CodeGenOpts.OptimizationRemarkAnalysis.patternMatches(PassName);
+ }
+ bool isMissedOptRemarkEnabled(StringRef PassName) const override {
+ return CodeGenOpts.OptimizationRemarkMissed.patternMatches(PassName);
+ }
+ bool isPassedOptRemarkEnabled(StringRef PassName) const override {
+ return CodeGenOpts.OptimizationRemark.patternMatches(PassName);
+ }
- virtual void anchor();
- DiagnosticsEngine &Diags;
- BackendAction Action;
- const HeaderSearchOptions &HeaderSearchOpts;
- const CodeGenOptions &CodeGenOpts;
- const TargetOptions &TargetOpts;
- const LangOptions &LangOpts;
- std::unique_ptr<raw_pwrite_stream> AsmOutStream;
- ASTContext *Context;
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
+ bool isAnyRemarkEnabled() const override {
+ return CodeGenOpts.OptimizationRemarkAnalysis.hasValidPattern() ||
+ CodeGenOpts.OptimizationRemarkMissed.hasValidPattern() ||
+ CodeGenOpts.OptimizationRemark.hasValidPattern();
+ }
- Timer LLVMIRGeneration;
- unsigned LLVMIRGenerationRefCount;
+private:
+ const CodeGenOptions &CodeGenOpts;
+ BackendConsumer *BackendCon;
+};
- /// True if we've finished generating IR. This prevents us from generating
- /// additional LLVM IR after emitting output in HandleTranslationUnit. This
- /// can happen when Clang plugins trigger additional AST deserialization.
- bool IRGenFinished = false;
+static void reportOptRecordError(Error E, DiagnosticsEngine &Diags,
+ const CodeGenOptions &CodeGenOpts) {
+ handleAllErrors(
+ std::move(E),
+ [&](const LLVMRemarkSetupFileError &E) {
+ Diags.Report(diag::err_cannot_open_file)
+ << CodeGenOpts.OptRecordFile << E.message();
+ },
+ [&](const LLVMRemarkSetupPatternError &E) {
+ Diags.Report(diag::err_drv_optimization_remark_pattern)
+ << E.message() << CodeGenOpts.OptRecordPasses;
+ },
+ [&](const LLVMRemarkSetupFormatError &E) {
+ Diags.Report(diag::err_drv_optimization_remark_format)
+ << CodeGenOpts.OptRecordFormat;
+ });
+}
- bool TimerIsEnabled = false;
+BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const PreprocessorOptions &PPOpts,
+ const CodeGenOptions &CodeGenOpts,
+ const TargetOptions &TargetOpts,
+ const LangOptions &LangOpts,
+ const std::string &InFile,
+ SmallVector<LinkModule, 4> LinkModules,
+ std::unique_ptr<raw_pwrite_stream> OS,
+ LLVMContext &C,
+ CoverageSourceInfo *CoverageInfo)
+ : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+ CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+ AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
+ LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+ LLVMIRGenerationRefCount(0),
+ Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
+ PPOpts, CodeGenOpts, C, CoverageInfo)),
+ LinkModules(std::move(LinkModules)) {
+ TimerIsEnabled = CodeGenOpts.TimePasses;
+ llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+ llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+}
- std::unique_ptr<CodeGenerator> Gen;
+// This constructor is used in installing an empty BackendConsumer
+// to use the clang diagnostic handler for IR input files. It avoids
+// initializing the OS field.
+BackendConsumer::BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+ IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+ const HeaderSearchOptions &HeaderSearchOpts,
+ const PreprocessorOptions &PPOpts,
+ const CodeGenOptions &CodeGenOpts,
+ const TargetOptions &TargetOpts,
+ const LangOptions &LangOpts,
+ llvm::Module *Module,
+ SmallVector<LinkModule, 4> LinkModules,
+ LLVMContext &C,
+ CoverageSourceInfo *CoverageInfo)
+ : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
+ CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
+ Context(nullptr), FS(VFS),
+ LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
+ LLVMIRGenerationRefCount(0),
+ Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts,
+ PPOpts, CodeGenOpts, C, CoverageInfo)),
+ LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
+ TimerIsEnabled = CodeGenOpts.TimePasses;
+ llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
+ llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
+}
- SmallVector<LinkModule, 4> LinkModules;
+llvm::Module* BackendConsumer::getModule() const {
+ return Gen->GetModule();
+}
- // A map from mangled names to their function's source location, used for
- // backend diagnostics as the Clang AST may be unavailable. We actually use
- // the mangled name's hash as the key because mangled names can be very
- // long and take up lots of space. Using a hash can cause name collision,
- // but that is rare and the consequences are pointing to a wrong source
- // location which is not severe. This is a vector instead of an actual map
- // because we optimize for time building this map rather than time
- // retrieving an entry, as backend diagnostics are uncommon.
- std::vector<std::pair<llvm::hash_code, FullSourceLoc>>
- ManglingFullSourceLocs;
+std::unique_ptr<llvm::Module> BackendConsumer::takeModule() {
+ return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
+}
- // This is here so that the diagnostic printer knows the module a diagnostic
- // refers to.
- llvm::Module *CurLinkModule = nullptr;
+CodeGenerator* BackendConsumer::getCodeGenerator() {
+ return Gen.get();
+}
- public:
- BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts,
- const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts,
- const LangOptions &LangOpts, const std::string &InFile,
- SmallVector<LinkModule, 4> LinkModules,
- std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
- CoverageSourceInfo *CoverageInfo = nullptr)
- : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
- CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
- AsmOutStream(std::move(OS)), Context(nullptr), FS(VFS),
- LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
- LLVMIRGenerationRefCount(0),
- Gen(CreateLLVMCodeGen(Diags, InFile, std::move(VFS), HeaderSearchOpts,
- PPOpts, CodeGenOpts, C, CoverageInfo)),
- LinkModules(std::move(LinkModules)) {
- TimerIsEnabled = CodeGenOpts.TimePasses;
- llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
- llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
- }
+void BackendConsumer::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+ Gen->HandleCXXStaticMemberVarInstantiation(VD);
+}
- // This constructor is used in installing an empty BackendConsumer
- // to use the clang diagnostic handler for IR input files. It avoids
- // initializing the OS field.
- BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
- IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
- const HeaderSearchOptions &HeaderSearchOpts,
- const PreprocessorOptions &PPOpts,
- const CodeGenOptions &CodeGenOpts,
- const TargetOptions &TargetOpts,
- const LangOptions &LangOpts, llvm::Module *Module,
- SmallVector<LinkModule, 4> LinkModules, LLVMContext &C,
- CoverageSourceInfo *CoverageInfo = nullptr)
- : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
- CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
- Context(nullptr), FS(VFS),
- LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
- LLVMIRGenerationRefCount(0),
- Gen(CreateLLVMCodeGen(Diags, "", std::move(VFS), HeaderSearchOpts,
- PPOpts, CodeGenOpts, C, CoverageInfo)),
- LinkModules(std::move(LinkModules)), CurLinkModule(Module) {
- TimerIsEnabled = CodeGenOpts.TimePasses;
- llvm::TimePassesIsEnabled = CodeGenOpts.TimePasses;
- llvm::TimePassesPerRun = CodeGenOpts.TimePassesPerRun;
- }
- llvm::Module *getModule() const { return Gen->GetModule(); }
- std::unique_ptr<llvm::Module> takeModule() {
- return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
- }
+void BackendConsumer::Initialize(ASTContext &Ctx) {
+ assert(!Context && "initialized multiple times");
- CodeGenerator *getCodeGenerator() { return Gen.get(); }
+ Context = &Ctx;
- void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
- Gen->HandleCXXStaticMemberVarInstantiation(VD);
- }
+ if (TimerIsEnabled)
+ LLVMIRGeneration.startTimer();
- void Initialize(ASTContext &Ctx) override {
- assert(!Context && "initialized multiple times");
+ Gen->Initialize(Ctx);
- Context = &Ctx;
+ if (TimerIsEnabled)
+ LLVMIRGeneration.stopTimer();
+}
- if (TimerIsEnabled)
- LLVMIRGeneration.startTimer();
+bool BackendConsumer::HandleTopLevelDecl(DeclGroupRef D) {
+ PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(),
+ Context->getSourceManager(),
+ "LLVM IR generation of declaration");
- Gen->Initialize(Ctx);
+ // Recurse.
+ if (TimerIsEnabled) {
+ LLVMIRGenerationRefCount += 1;
+ if (LLVMIRGenerationRefCount == 1)
+ LLVMIRGeneration.startTimer();
+ }
- if (TimerIsEnabled)
- LLVMIRGeneration.stopTimer();
- }
+ Gen->HandleTopLevelDecl(D);
- bool HandleTopLevelDecl(DeclGroupRef D) override {
- PrettyStackTraceDecl CrashInfo(*D.begin(), SourceLocation(),
- Context->getSourceManager(),
- "LLVM IR generation of declaration");
+ if (TimerIsEnabled) {
+ LLVMIRGenerationRefCount -= 1;
+ if (LLVMIRGenerationRefCount == 0)
+ LLVMIRGeneration.stopTimer();
+ }
- // Recurse.
- if (TimerIsEnabled) {
- LLVMIRGenerationRefCount += 1;
- if (LLVMIRGenerationRefCount == 1)
- LLVMIRGeneration.startTimer();
- }
+ return true;
+}
- Gen->HandleTopLevelDecl(D);
+void BackendConsumer::HandleInlineFunctionDefinition(FunctionDecl *D) {
+ PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
+ Context->getSourceManager(),
+ "LLVM IR generation of inline function");
+ if (TimerIsEnabled)
+ LLVMIRGeneration.startTimer();
- if (TimerIsEnabled) {
- LLVMIRGenerationRefCount -= 1;
- if (LLVMIRGenerationRefCount == 0)
- LLVMIRGeneration.stopTimer();
- }
+ Gen->HandleInlineFunctionDefinition(D);
- return true;
- }
+ if (TimerIsEnabled)
+ LLVMIRGeneration.stopTimer();
+}
- void HandleInlineFunctionDefinition(FunctionDecl *D) override {
- PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
- Context->getSourceManager(),
- "LLVM IR generation of inline function");
- if (TimerIsEnabled)
- LLVMIRGeneration.startTimer();
+void BackendConsumer::HandleInterestingDecl(DeclGroupRef D) {
+ // Ignore interesting decls from the AST reader after IRGen is finished.
+ if (!IRGenFinished)
+ HandleTopLevelDecl(D);
+}
- Gen->HandleInlineFunctionDefinition(D);
+// Links each entry in LinkModules into our module. Returns true on error.
+bool BackendConsumer::LinkInModules(llvm::Module *M, bool ShouldLinkFiles) {
- if (TimerIsEnabled)
- LLVMIRGeneration.stopTimer();
- }
+ for (auto &LM : LinkModules) {
+ assert(LM.Module && "LinkModule does not actually have a module");
- void HandleInterestingDecl(DeclGroupRef D) override {
- // Ignore interesting decls from the AST reader after IRGen is finished.
- if (!IRGenFinished)
- HandleTopLevelDecl(D);
- }
+ // If ShouldLinkFiles is not set, skip files added via the
+ // -mlink-bitcode-files, only linking -mlink-builtin-bitcode
+ if (!LM.Internalize && !ShouldLinkFiles)
+ continue;
- // Links each entry in LinkModules into our module. Returns true on error.
- bool LinkInModules(llvm::Module *M) {
- for (auto &LM : LinkModules) {
- assert(LM.Module && "LinkModule does not actually have a module");
- if (LM.PropagateAttrs)
- for (Function &F : *LM.Module) {
- // Skip intrinsics. Keep consistent with how intrinsics are created
- // in LLVM IR.
- if (F.isIntrinsic())
- continue;
- CodeGen::mergeDefaultFunctionDefinitionAttributes(
- F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize);
- }
+ if (LM.PropagateAttrs)
+ for (Function &F : *LM.Module) {
+ // Skip intrinsics. Keep consistent with how intrinsics are created
+ // in LLVM IR.
+ if (F.isIntrinsic())
+ continue;
+ CodeGen::mergeDefaultFunctionDefinitionAttributes(
+ F, CodeGenOpts, LangOpts, TargetOpts, LM.Internalize);
+ }
- CurLinkModule = LM.Module.get();
+ CurLinkModule = LM.Module.get();
+ bool Err;
- bool Err;
- if (LM.Internalize) {
- Err = Linker::linkModules(
- *M, std::move(LM.Module), LM.LinkFlags,
- [](llvm::Module &M, const llvm::StringSet<> &GVS) {
- internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
- return !GV.hasName() || (GVS.count(GV.getName()) == 0);
- });
+ auto DoLink = [&](auto &Mod) {
+ if (LM.Internalize) {
+ Err = Linker::linkModules(
+ *M, std::move(Mod), LM.LinkFlags,
+ [](llvm::Module &M, const llvm::StringSet<> &GVS) {
+ internalizeModule(M, [&GVS](const llvm::GlobalValue &GV) {
+ return !GV.hasName() || (GVS.count(GV.getName()) == 0);
});
- } else {
- Err = Linker::linkModules(*M, std::move(LM.Module), LM.LinkFlags);
- }
-
- if (Err)
- return true;
- }
- LinkModules.clear();
- return false; // success
- }
+ });
+ } else
+ Err = Linker::linkModules(*M, std::move(Mod), LM.LinkFlags);
+ };
- void HandleTranslationUnit(ASTContext &C) override {
- {
- llvm::TimeTraceScope TimeScope("Frontend");
- PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
- if (TimerIsEnabled) {
- LLVMIRGenerationRefCount += 1;
- if (LLVMIRGenerationRefCount == 1)
- LLVMIRGeneration.startTimer();
- }
+ // Create a Clone to move to the linker, which preserves the original
+ // linking modules, allowing them to be linked again in the future
+ if (ClRelinkBuiltinBitcodePostop) {
+ // TODO: If CloneModule() is updated to support cloning of unmaterialized
+ // modules, we can remove this
+ if (Error E = CurLinkModule->materializeAll())
+ return false;
- Gen->HandleTranslationUnit(C);
+ std::unique_ptr<llvm::Module> Clone = llvm::CloneModule(*LM.Module);
- if (TimerIsEnabled) {
- LLVMIRGenerationRefCount -= 1;
- if (LLVMIRGenerationRefCount == 0)
- LLVMIRGeneration.stopTimer();
- }
-
- IRGenFinished = true;
- }
+ DoLink(Clone);
+ }
+ // Otherwise we can link (and clean up) the original modules
+ else {
+ DoLink(LM.Module);
+ }
+ }
- // Silently ignore if we weren't initialized for some reason.
- if (!getModule())
- return;
+ return false; // success
+}
- LLVMContext &Ctx = getModule()->getContext();
- std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler =
- Ctx.getDiagnosticHandler();
- Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>(
- CodeGenOpts, this));
+void BackendConsumer::HandleTranslationUnit(ASTContext &C) {
+ {
+ llvm::TimeTraceScope TimeScope("Frontend");
+ PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
+ if (TimerIsEnabled) {
+ LLVMIRGenerationRefCount += 1;
+ if (LLVMIRGenerationRefCount == 1)
+ LLVMIRGeneration.startTimer();
+ }
- Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr =
- setupLLVMOptimizationRemarks(
- Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses,
- CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness,
- CodeGenOpts.DiagnosticsHotnessThreshold);
+ Gen->HandleTranslationUnit(C);
- if (Error E = OptRecordFileOrErr.takeError()) {
- reportOptRecordError(std::move(E), Diags, CodeGenOpts);
- return;
- }
+ if (TimerIsEnabled) {
+ LLVMIRGenerationRefCount -= 1;
+ if (LLVMIRGenerationRefCount == 0)
+ LLVMIRGeneration.stopTimer();
+ }
- std::unique_ptr<llvm::ToolOutputFile> OptRecordFile =
- std::move(*OptRecordFileOrErr);
+ IRGenFinished = true;
+ }
- if (OptRecordFile &&
- CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
- Ctx.setDiagnosticsHotnessRequested(true);
+ // Silently ignore if we weren't initialized for some reason.
+ if (!getModule())
+ return;
- if (CodeGenOpts.MisExpect) {
- Ctx.setMisExpectWarningRequested(true);
- }
+ LLVMContext &Ctx = getModule()->getContext();
+ std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler =
+ Ctx.getDiagnosticHandler();
+ Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>(
+ CodeGenOpts, this));
- if (CodeGenOpts.DiagnosticsMisExpectTolerance) {
- Ctx.setDiagnosticsMisExpectTolerance(
- CodeGenOpts.DiagnosticsMisExpectTolerance);
- }
+ Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr =
+ setupLLVMOptimizationRemarks(
+ Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses,
+ CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness,
+ CodeGenOpts.DiagnosticsHotnessThreshold);
- // Link each LinkModule into our module.
- if (LinkInModules(getModule()))
- return;
+ if (Error E = OptRecordFileOrErr.takeError()) {
+ reportOptRecordError(std::move(E), Diags, CodeGenOpts);
+ return;
+ }
- for (auto &F : getModule()->functions()) {
- if (const Decl *FD = Gen->GetDeclForMangledName(F.getName())) {
- auto Loc = FD->getASTContext().getFullLoc(FD->getLocation());
- // TODO: use a fast content hash when available.
- auto NameHash = llvm::hash_value(F.getName());
- ManglingFullSourceLocs.push_back(std::make_pair(NameHash, Loc));
- }
- }
+ std::unique_ptr<llvm::ToolOutputFile> OptRecordFile =
+ std::move(*OptRecordFileOrErr);
- if (CodeGenOpts.ClearASTBeforeBackend) {
- LLVM_DEBUG(llvm::dbgs() << "Clearing AST...\n");
- // Access to the AST is no longer available after this.
- // Other things that the ASTContext manages are still available, e.g.
- // the SourceManager. It'd be nice if we could separate out all the
- // things in ASTContext used after this point and null out the
- // ASTContext, but too many various parts of the ASTContext are still
- // used in various parts.
- C.cleanup();
- C.getAllocator().Reset();
- }
+ if (OptRecordFile &&
+ CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
+ Ctx.setDiagnosticsHotnessRequested(true);
- EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
+ if (CodeGenOpts.MisExpect) {
+ Ctx.setMisExpectWarningRequested(true);
+ }
- EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts,
- LangOpts, C.getTargetInfo().getDataLayoutString(),
- getModule(), Action, FS, std::move(AsmOutStream));
+ if (CodeGenOpts.DiagnosticsMisExpectTolerance) {
+ Ctx.setDiagnosticsMisExpectTolerance(
+ CodeGenOpts.DiagnosticsMisExpectTolerance);
+ }
- Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler));
+ // Link each LinkModule into our module.
+ if (LinkInModules(getModule()))
+ return;
- if (OptRecordFile)
- OptRecordFile->keep();
+ for (auto &F : getModule()->functions()) {
+ if (const Decl *FD = Gen->GetDeclForMangledName(F.getName())) {
+ auto Loc = FD->getASTContext().getFullLoc(FD->getLocation());
+ // TODO: use a fast content hash when available.
+ auto NameHash = llvm::hash_value(F.getName());
+ ManglingFullSourceLocs.push_back(std::make_pair(NameHash, Loc));
}
+ }
- void HandleTagDeclDefinition(TagDecl *D) override {
- PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
- Context->getSourceManager(),
- "LLVM IR generation of declaration");
- Gen->HandleTagDeclDefinition(D);
- }
+ if (CodeGenOpts.ClearASTBeforeBackend) {
+ LLVM_DEBUG(llvm::dbgs() << "Clearing AST...\n");
+ // Access to the AST is no longer available after this.
+ // Other things that the ASTContext manages are still available, e.g.
+ // the SourceManager. It'd be nice if we could separate out all the
+ // things in ASTContext used after this point and null out the
+ // ASTContext, but too many various parts of the ASTContext are still
+ // used in various parts.
+ C.cleanup();
+ C.getAllocator().Reset();
+ }
- void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
- Gen->HandleTagDeclRequiredDefinition(D);
- }
+ EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
- void CompleteTentativeDefinition(VarDecl *D) override {
- Gen->CompleteTentativeDefinition(D);
- }
+ EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts,
+ C.getTargetInfo().getDataLayoutString(), getModule(),
+ Action, FS, std::move(AsmOutStream), this);
- void CompleteExternalDeclaration(VarDecl *D) override {
- Gen->CompleteExternalDeclaration(D);
- }
+ Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler));
- void AssignInheritanceModel(CXXRecordDecl *RD) override {
- Gen->AssignInheritanceModel(RD);
- }
+ if (OptRecordFile)
+ OptRecordFile->keep();
+}
- void HandleVTable(CXXRecordDecl *RD) override {
- Gen->HandleVTable(RD);
- }
+void BackendConsumer::HandleTagDeclDefinition(TagDecl *D) {
+ PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
+ Context->getSourceManager(),
+ "LLVM IR generation of declaration");
+ Gen->HandleTagDeclDefinition(D);
+}
- /// Get the best possible source location to represent a diagnostic that
- /// may have associated debug info.
- const FullSourceLoc
- getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithLocationBase &D,
- bool &BadDebugInfo, StringRef &Filename,
- unsigned &Line, unsigned &Column) const;
+void BackendConsumer::HandleTagDeclRequiredDefinition(const TagDecl *D) {
+ Gen->HandleTagDeclRequiredDefinition(D);
+}
- std::optional<FullSourceLoc>
- getFunctionSourceLocation(const Function &F) const;
+void BackendConsumer::CompleteTentativeDefinition(VarDecl *D) {
+ Gen->CompleteTentativeDefinition(D);
+}
- void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI);
- /// Specialized handler for InlineAsm diagnostic.
- /// \return True if the diagnostic has been successfully reported, false
- /// otherwise.
- bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D);
- /// Specialized handler for diagnostics reported using SMDiagnostic.
- void SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &D);
- /// Specialized handler for StackSize diagnostic.
- /// \return True if the diagnostic has been successfully reported, false
- /// otherwise.
- bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
- /// Specialized handler for ResourceLimit diagnostic.
- /// \return True if the diagnostic has been successfully reported, false
- /// otherwise.
- bool ResourceLimitDiagHandler(const llvm::DiagnosticInfoResourceLimit &D);
+void BackendConsumer::CompleteExternalDeclaration(VarDecl *D) {
+ Gen->CompleteExternalDeclaration(D);
+}
- /// Specialized handler for unsupported backend feature diagnostic.
- void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
- /// Specialized handlers for optimization remarks.
- /// Note that these handlers only accept remarks and they always handle
- /// them.
- void EmitOptimizationMessage(const llvm::DiagnosticInfoOptimizationBase &D,
- unsigned DiagID);
- void
- OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationBase &D);
- void OptimizationRemarkHandler(
- const llvm::OptimizationRemarkAnalysisFPCommute &D);
- void OptimizationRemarkHandler(
- const llvm::OptimizationRemarkAnalysisAliasing &D);
- void OptimizationFailureHandler(
- const llvm::DiagnosticInfoOptimizationFailure &D);
- void DontCallDiagHandler(const DiagnosticInfoDontCall &D);
- /// Specialized handler for misexpect warnings.
- /// Note that misexpect remarks are emitted through ORE
- void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D);
- };
+void BackendConsumer::AssignInheritanceModel(CXXRecordDecl *RD) {
+ Gen->AssignInheritanceModel(RD);
+}
- void BackendConsumer::anchor() {}
+void BackendConsumer::HandleVTable(CXXRecordDecl *RD) {
+ Gen->HandleVTable(RD);
}
+void BackendConsumer::anchor() { }
+
+} // namespace clang
+
bool ClangDiagnosticHandler::handleDiagnostics(const DiagnosticInfo &DI) {
BackendCon->DiagnosticHandlerImpl(DI);
return true;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index fab70b66d1d9..2199d7b58fb9 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -495,12 +495,12 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
if (CurFnInfo->getMaxVectorWidth() > LargestVectorWidth)
LargestVectorWidth = CurFnInfo->getMaxVectorWidth();
- // Add the required-vector-width attribute. This contains the max width from:
+ // Add the min-legal-vector-width attribute. This contains the max width from:
// 1. min-vector-width attribute used in the source program.
// 2. Any builtins used that have a vector width specified.
// 3. Values passed in and out of inline assembly.
// 4. Width of vector arguments and return types for this function.
- // 5. Width of vector aguments and return types for functions called by this
+ // 5. Width of vector arguments and return types for functions called by this
// function.
if (getContext().getTargetInfo().getTriple().isX86())
CurFn->addFnAttr("min-legal-vector-width",
@@ -572,11 +572,11 @@ llvm::ConstantInt *
CodeGenFunction::getUBSanFunctionTypeHash(QualType Ty) const {
// Remove any (C++17) exception specifications, to allow calling e.g. a
// noexcept function through a non-noexcept pointer.
- if (!isa<FunctionNoProtoType>(Ty))
+ if (!Ty->isFunctionNoProtoType())
Ty = getContext().getFunctionTypeWithExceptionSpec(Ty, EST_None);
std::string Mangled;
llvm::raw_string_ostream Out(Mangled);
- CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out, false);
+ CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out, false);
return llvm::ConstantInt::get(
CGM.Int32Ty, static_cast<uint32_t>(llvm::xxh3_64bits(Mangled)));
}
@@ -683,6 +683,19 @@ static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) {
return true;
}
+bool CodeGenFunction::isInAllocaArgument(CGCXXABI &ABI, QualType Ty) {
+ const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
+ return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
+}
+
+bool CodeGenFunction::hasInAllocaArg(const CXXMethodDecl *MD) {
+ return getTarget().getTriple().getArch() == llvm::Triple::x86 &&
+ getTarget().getCXXABI().isMicrosoft() &&
+ llvm::any_of(MD->parameters(), [&](ParmVarDecl *P) {
+ return isInAllocaArgument(CGM.getCXXABI(), P->getType());
+ });
+}
+
/// Return the UBSan prologue signature for \p FD if one is available.
static llvm::Constant *getPrologueSignature(CodeGenModule &CGM,
const FunctionDecl *FD) {
@@ -1108,11 +1121,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
Address(&*AI, ConvertType(RetTy),
CurFnInfo->getReturnInfo().getIndirectAlign(), KnownNonNull);
if (!CurFnInfo->getReturnInfo().getIndirectByVal()) {
- ReturnValuePointer =
- CreateDefaultAlignTempAlloca(Int8PtrTy, "result.ptr");
- Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast(
- ReturnValue.getPointer(), Int8PtrTy),
- ReturnValuePointer);
+ ReturnValuePointer = CreateDefaultAlignTempAlloca(
+ ReturnValue.getPointer()->getType(), "result.ptr");
+ Builder.CreateStore(ReturnValue.getPointer(), ReturnValuePointer);
}
} else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca &&
!hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
@@ -1154,12 +1165,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
EmitFunctionProlog(*CurFnInfo, CurFn, Args);
- if (isa_and_nonnull<CXXMethodDecl>(D) &&
- cast<CXXMethodDecl>(D)->isInstance()) {
- CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
- const CXXMethodDecl *MD = cast<CXXMethodDecl>(D);
- if (MD->getParent()->isLambda() &&
- MD->getOverloadedOperator() == OO_Call) {
+ if (const CXXMethodDecl *MD = dyn_cast_if_present<CXXMethodDecl>(D);
+ MD && !MD->isStatic()) {
+ bool IsInLambda =
+ MD->getParent()->isLambda() && MD->getOverloadedOperator() == OO_Call;
+ if (MD->isImplicitObjectMemberFunction())
+ CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
+ if (IsInLambda) {
// We're in a lambda; figure out the captures.
MD->getParent()->getCaptureFields(LambdaCaptureFields,
LambdaThisCaptureField);
@@ -1189,7 +1201,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
VLASizeMap[VAT->getSizeExpr()] = ExprArg;
}
}
- } else {
+ } else if (MD->isImplicitObjectMemberFunction()) {
// Not in a lambda; just use 'this' from the method.
// FIXME: Should we generate a new load for each use of 'this'? The
// fast register allocator would be happier...
@@ -1202,11 +1214,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
SkippedChecks.set(SanitizerKind::ObjectSize, true);
QualType ThisTy = MD->getThisType();
- // If this is the call operator of a lambda with no capture-default, it
+ // If this is the call operator of a lambda with no captures, it
// may have a static invoker function, which may call this operator with
// a null 'this' pointer.
- if (isLambdaCallOperator(MD) &&
- MD->getParent()->getLambdaCaptureDefault() == LCD_None)
+ if (isLambdaCallOperator(MD) && MD->getParent()->isCapturelessLambda())
SkippedChecks.set(SanitizerKind::Null, true);
EmitTypeCheck(
@@ -1249,11 +1260,6 @@ void CodeGenFunction::EmitFunctionBody(const Stmt *Body) {
EmitCompoundStmtWithoutScope(*S);
else
EmitStmt(Body);
-
- // This is checked after emitting the function body so we know if there
- // are any permitted infinite loops.
- if (checkIfFunctionMustProgress())
- CurFn->addFnAttr(llvm::Attribute::MustProgress);
}
/// When instrumenting to collect profile data, the counts for some blocks
@@ -1300,7 +1306,7 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
QualType ResTy = FD->getReturnType();
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
- if (MD && MD->isInstance()) {
+ if (MD && MD->isImplicitObjectMemberFunction()) {
if (CGM.getCXXABI().HasThisReturn(GD))
ResTy = MD->getThisType();
else if (CGM.getCXXABI().hasMostDerivedReturn(GD))
@@ -1325,7 +1331,7 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
auto *Implicit = ImplicitParamDecl::Create(
getContext(), Param->getDeclContext(), Param->getLocation(),
- /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamDecl::Other);
+ /*Id=*/nullptr, getContext().getSizeType(), ImplicitParamKind::Other);
SizeArguments[Param] = Implicit;
Args.push_back(Implicit);
}
@@ -1432,6 +1438,11 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
if (Body && isa_and_nonnull<CoroutineBodyStmt>(Body))
llvm::append_range(FnArgs, FD->parameters());
+ // Ensure that the function adheres to the forward progress guarantee, which
+ // is required by certain optimizations.
+ if (checkIfFunctionMustProgress())
+ CurFn->addFnAttr(llvm::Attribute::MustProgress);
+
// Generate the body of the function.
PGO.assignRegionCounters(GD, CurFn);
if (isa<CXXDestructorDecl>(FD))
@@ -1447,6 +1458,17 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// The lambda static invoker function is special, because it forwards or
// clones the body of the function call operator (but is actually static).
EmitLambdaStaticInvokeBody(cast<CXXMethodDecl>(FD));
+ } else if (isa<CXXMethodDecl>(FD) &&
+ isLambdaCallOperator(cast<CXXMethodDecl>(FD)) &&
+ !FnInfo.isDelegateCall() &&
+ cast<CXXMethodDecl>(FD)->getParent()->getLambdaStaticInvoker() &&
+ hasInAllocaArg(cast<CXXMethodDecl>(FD))) {
+ // If emitting a lambda with static invoker on X86 Windows, change
+ // the call operator body.
+ // Make sure that this is a call operator with an inalloca arg and check
+ // for delegate call to make sure this is the original call op and not the
+ // new forwarding function for the static invoker.
+ EmitLambdaInAllocaCallOpBody(cast<CXXMethodDecl>(FD));
} else if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) &&
(cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator() ||
cast<CXXMethodDecl>(FD)->isMoveAssignmentOperator())) {
@@ -2025,8 +2047,7 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
NullConstant, Twine());
CharUnits NullAlign = DestPtr.getAlignment();
NullVariable->setAlignment(NullAlign.getAsAlign());
- Address SrcPtr(Builder.CreateBitCast(NullVariable, Builder.getInt8PtrTy()),
- Builder.getInt8Ty(), NullAlign);
+ Address SrcPtr(NullVariable, Builder.getInt8Ty(), NullAlign);
if (vla) return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal);
@@ -2465,10 +2486,8 @@ llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn,
const AnnotateAttr *Attr) {
SmallVector<llvm::Value *, 5> Args = {
AnnotatedVal,
- Builder.CreateBitCast(CGM.EmitAnnotationString(AnnotationStr),
- ConstGlobalsPtrTy),
- Builder.CreateBitCast(CGM.EmitAnnotationUnit(Location),
- ConstGlobalsPtrTy),
+ CGM.EmitAnnotationString(AnnotationStr),
+ CGM.EmitAnnotationUnit(Location),
CGM.EmitAnnotationLineNo(Location),
};
if (Attr)
@@ -2478,15 +2497,10 @@ llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn,
void CodeGenFunction::EmitVarAnnotations(const VarDecl *D, llvm::Value *V) {
assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
- // FIXME We create a new bitcast for every annotation because that's what
- // llvm-gcc was doing.
- unsigned AS = V->getType()->getPointerAddressSpace();
- llvm::Type *I8PtrTy = Builder.getInt8PtrTy(AS);
for (const auto *I : D->specific_attrs<AnnotateAttr>())
EmitAnnotationCall(CGM.getIntrinsic(llvm::Intrinsic::var_annotation,
- {I8PtrTy, CGM.ConstGlobalsPtrTy}),
- Builder.CreateBitCast(V, I8PtrTy, V->getName()),
- I->getAnnotation(), D->getLocation(), I);
+ {V->getType(), CGM.ConstGlobalsPtrTy}),
+ V, I->getAnnotation(), D->getLocation(), I);
}
Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
@@ -2571,10 +2585,15 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
std::string MissingFeature;
llvm::StringMap<bool> CallerFeatureMap;
CGM.getContext().getFunctionFeatureMap(CallerFeatureMap, FD);
+ // When compiling in HipStdPar mode we have to be conservative in rejecting
+ // target specific features in the FE, and defer the possible error to the
+ // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
+ // referenced by an accelerator executable function, we emit an error.
+ bool IsHipStdPar = getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice;
if (BuiltinID) {
StringRef FeatureList(CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID));
if (!Builtin::evaluateRequiredTargetFeatures(
- FeatureList, CallerFeatureMap)) {
+ FeatureList, CallerFeatureMap) && !IsHipStdPar) {
CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
<< TargetDecl->getDeclName()
<< FeatureList;
@@ -2607,7 +2626,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
return false;
}
return true;
- }))
+ }) && !IsHipStdPar)
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
<< FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
} else if (!FD->isMultiVersion() && FD->hasAttr<TargetAttr>()) {
@@ -2616,7 +2635,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
for (const auto &F : CalleeFeatureMap) {
if (F.getValue() && (!CallerFeatureMap.lookup(F.getKey()) ||
- !CallerFeatureMap.find(F.getKey())->getValue()))
+ !CallerFeatureMap.find(F.getKey())->getValue()) &&
+ !IsHipStdPar)
CGM.getDiags().Report(Loc, diag::err_function_needs_feature)
<< FD->getDeclName() << TargetDecl->getDeclName() << F.getKey();
}
@@ -2658,8 +2678,15 @@ llvm::Value *CodeGenFunction::FormX86ResolverCondition(
const MultiVersionResolverOption &RO) {
llvm::Value *Condition = nullptr;
- if (!RO.Conditions.Architecture.empty())
- Condition = EmitX86CpuIs(RO.Conditions.Architecture);
+ if (!RO.Conditions.Architecture.empty()) {
+ StringRef Arch = RO.Conditions.Architecture;
+ // If arch= specifies an x86-64 micro-architecture level, test the feature
+ // with __builtin_cpu_supports, otherwise use __builtin_cpu_is.
+ if (Arch.starts_with("x86-64"))
+ Condition = EmitX86CpuSupports({Arch});
+ else
+ Condition = EmitX86CpuIs(Arch);
+ }
if (!RO.Conditions.Features.empty()) {
llvm::Value *FeatureCond = EmitX86CpuSupports(RO.Conditions.Features);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 409f48a04906..618e78809db4 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1250,11 +1250,11 @@ public:
/// destroyed by aggressive peephole optimizations that assume that
/// all uses of a value have been realized in the IR.
class PeepholeProtection {
- llvm::Instruction *Inst;
+ llvm::Instruction *Inst = nullptr;
friend class CodeGenFunction;
public:
- PeepholeProtection() : Inst(nullptr) {}
+ PeepholeProtection() = default;
};
/// A non-RAII class containing all the information about a bound
@@ -1963,6 +1963,9 @@ private:
/// Check if the return value of this function requires sanitization.
bool requiresReturnValueCheck() const;
+ bool isInAllocaArgument(CGCXXABI &ABI, QualType Ty);
+ bool hasInAllocaArg(const CXXMethodDecl *MD);
+
llvm::BasicBlock *TerminateLandingPad = nullptr;
llvm::BasicBlock *TerminateHandler = nullptr;
llvm::SmallVector<llvm::BasicBlock *, 2> TrapBBs;
@@ -2227,10 +2230,17 @@ public:
void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S);
void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
- CallArgList &CallArgs);
+ CallArgList &CallArgs,
+ const CGFunctionInfo *CallOpFnInfo = nullptr,
+ llvm::Constant *CallOpFn = nullptr);
void EmitLambdaBlockInvokeBody();
- void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD);
void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD);
+ void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD,
+ CallArgList &CallArgs);
+ void EmitLambdaInAllocaImplFn(const CXXMethodDecl *CallOp,
+ const CGFunctionInfo **ImplFnInfo,
+ llvm::Function **ImplFn);
+ void EmitLambdaInAllocaCallOpBody(const CXXMethodDecl *MD);
void EmitLambdaVLACapture(const VariableArrayType *VAT, LValue LV) {
EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV);
}
@@ -3012,6 +3022,19 @@ public:
void EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index,
QualType IndexType, bool Accessed);
+ // Find a struct's flexible array member. It may be embedded inside multiple
+ // sub-structs, but must still be the last field.
+ const ValueDecl *FindFlexibleArrayMemberField(ASTContext &Ctx,
+ const RecordDecl *RD);
+
+ /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns
+ /// \p nullptr if either the attribute or the field doesn't exist.
+ const ValueDecl *FindCountedByField(const Expr *Base);
+
+ /// Build an expression accessing the "counted_by" field.
+ const Expr *BuildCountedByFieldExpr(const Expr *Base,
+ const ValueDecl *CountedByVD);
+
llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre);
ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
@@ -4007,6 +4030,8 @@ public:
const ObjCIvarDecl *Ivar);
LValue EmitLValueForField(LValue Base, const FieldDecl* Field);
LValue EmitLValueForLambdaField(const FieldDecl *Field);
+ LValue EmitLValueForLambdaField(const FieldDecl *Field,
+ llvm::Value *ThisValue);
/// EmitLValueForFieldInitialization - Like EmitLValueForField, except that
/// if the Field is a reference, this will return the address of the reference
@@ -4262,7 +4287,6 @@ public:
llvm::Value *EmitSVEMaskedStore(const CallExpr *,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
- llvm::Value *EmitTileslice(llvm::Value *Offset, llvm::Value *Base);
llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned BuiltinID);
@@ -4275,20 +4299,31 @@ public:
llvm::Value *EmitSVEStructStore(const SVETypeFlags &TypeFlags,
SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
+ /// FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider
+ /// vector. It extracts the scalable vector from the struct and inserts into
+ /// the wider vector. This avoids the error when allocating space in llvm
+ /// for struct of scalable vectors if a function returns struct.
+ llvm::Value *FormSVEBuiltinResult(llvm::Value *Call);
+
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
- llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
- llvm::Value *EmitSMEReadWrite(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSMEReadWrite(const SVETypeFlags &TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
- llvm::Value *EmitSMEZero(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSMEZero(const SVETypeFlags &TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
- llvm::Value *EmitSMELdrStr(SVETypeFlags TypeFlags,
+ llvm::Value *EmitSMELdrStr(const SVETypeFlags &TypeFlags,
llvm::SmallVectorImpl<llvm::Value *> &Ops,
unsigned IntID);
+
+ void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E,
+ SmallVectorImpl<llvm::Value *> &Ops,
+ SVETypeFlags TypeFlags);
+
llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
@@ -4299,6 +4334,8 @@ public:
llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx,
+ const CallExpr *E);
llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
@@ -4306,7 +4343,6 @@ public:
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
- llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
llvm::AtomicOrdering &AO,
llvm::SyncScope::ID &SSID);
@@ -4500,6 +4536,11 @@ public:
void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn,
llvm::Constant *addr);
+ /// Registers the dtor using 'llvm.global_dtors' for platforms that do not
+ /// support an 'atexit()' function.
+ void registerGlobalDtorWithLLVM(const VarDecl &D, llvm::FunctionCallee fn,
+ llvm::Constant *addr);
+
/// Call atexit() with function dtorStub.
void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub);
@@ -4789,6 +4830,9 @@ private:
llvm::Value *EmittedE,
bool IsDynamic);
+ llvm::Value *emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
+ llvm::IntegerType *ResType);
+
void emitZeroOrPatternForAutoVarInit(QualType type, const VarDecl &D,
Address Loc);
@@ -4888,7 +4932,7 @@ private:
llvm::Value *EmitX86CpuIs(StringRef CPUStr);
llvm::Value *EmitX86CpuSupports(const CallExpr *E);
llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
- llvm::Value *EmitX86CpuSupports(uint64_t Mask);
+ llvm::Value *EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask);
llvm::Value *EmitX86CpuInit();
llvm::Value *FormX86ResolverCondition(const MultiVersionResolverOption &RO);
llvm::Value *EmitAArch64CpuInit();
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 07a9dec12f6f..b931a81bc008 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -28,6 +28,7 @@
#include "CoverageMappingGen.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTLambda.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
@@ -360,13 +361,14 @@ CodeGenModule::CodeGenModule(ASTContext &C,
IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
IntPtrTy = llvm::IntegerType::get(LLVMContext,
C.getTargetInfo().getMaxPointerWidth());
- Int8PtrTy = Int8Ty->getPointerTo(0);
- Int8PtrPtrTy = Int8PtrTy->getPointerTo(0);
+ Int8PtrTy = llvm::PointerType::get(LLVMContext, 0);
const llvm::DataLayout &DL = M.getDataLayout();
- AllocaInt8PtrTy = Int8Ty->getPointerTo(DL.getAllocaAddrSpace());
- GlobalsInt8PtrTy = Int8Ty->getPointerTo(DL.getDefaultGlobalsAddressSpace());
- ConstGlobalsPtrTy = Int8Ty->getPointerTo(
- C.getTargetAddressSpace(GetGlobalConstantAddressSpace()));
+ AllocaInt8PtrTy =
+ llvm::PointerType::get(LLVMContext, DL.getAllocaAddrSpace());
+ GlobalsInt8PtrTy =
+ llvm::PointerType::get(LLVMContext, DL.getDefaultGlobalsAddressSpace());
+ ConstGlobalsPtrTy = llvm::PointerType::get(
+ LLVMContext, C.getTargetAddressSpace(GetGlobalConstantAddressSpace()));
ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace();
// Build C++20 Module initializers.
@@ -563,8 +565,8 @@ static const llvm::GlobalValue *getAliasedGlobal(const llvm::GlobalValue *GV) {
}
static bool checkAliasedGlobal(
- DiagnosticsEngine &Diags, SourceLocation Location, bool IsIFunc,
- const llvm::GlobalValue *Alias, const llvm::GlobalValue *&GV,
+ const ASTContext &Context, DiagnosticsEngine &Diags, SourceLocation Location,
+ bool IsIFunc, const llvm::GlobalValue *Alias, const llvm::GlobalValue *&GV,
const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames,
SourceRange AliasRange) {
GV = getAliasedGlobal(Alias);
@@ -573,6 +575,14 @@ static bool checkAliasedGlobal(
return false;
}
+ if (GV->hasCommonLinkage()) {
+ const llvm::Triple &Triple = Context.getTargetInfo().getTriple();
+ if (Triple.getObjectFormat() == llvm::Triple::XCOFF) {
+ Diags.Report(Location, diag::err_alias_to_common);
+ return false;
+ }
+ }
+
if (GV->isDeclaration()) {
Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc;
Diags.Report(Location, diag::note_alias_requires_mangled_name)
@@ -633,7 +643,7 @@ void CodeGenModule::checkAliases() {
StringRef MangledName = getMangledName(GD);
llvm::GlobalValue *Alias = GetGlobalValue(MangledName);
const llvm::GlobalValue *GV = nullptr;
- if (!checkAliasedGlobal(Diags, Location, IsIFunc, Alias, GV,
+ if (!checkAliasedGlobal(getContext(), Diags, Location, IsIFunc, Alias, GV,
MangledDeclNames, Range)) {
Error = true;
continue;
@@ -689,6 +699,7 @@ void CodeGenModule::checkAliases() {
void CodeGenModule::clear() {
DeferredDeclsToEmit.clear();
EmittedDeferredDecls.clear();
+ DeferredAnnotations.clear();
if (OpenMPRuntime)
OpenMPRuntime->clear();
}
@@ -752,6 +763,14 @@ static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO,
}
}
+static bool isStackProtectorOn(const LangOptions &LangOpts,
+ const llvm::Triple &Triple,
+ clang::LangOptions::StackProtectorMode Mode) {
+ if (Triple.isAMDGPU() || Triple.isNVPTX())
+ return false;
+ return LangOpts.getStackProtector() == Mode;
+}
+
void CodeGenModule::Release() {
Module *Primary = getContext().getCurrentNamedModule();
if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule())
@@ -829,7 +848,7 @@ void CodeGenModule::Release() {
// Emit amdgpu_code_object_version module flag, which is code object version
// times 100.
if (getTarget().getTargetOpts().CodeObjectVersion !=
- TargetOptions::COV_None) {
+ llvm::CodeObjectVersionKind::COV_None) {
getModule().addModuleFlag(llvm::Module::Error,
"amdgpu_code_object_version",
getTarget().getTargetOpts().CodeObjectVersion);
@@ -967,6 +986,41 @@ void CodeGenModule::Release() {
Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity();
getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
+ if (getTriple().isOSzOS()) {
+ getModule().addModuleFlag(llvm::Module::Warning,
+ "zos_product_major_version",
+ uint32_t(CLANG_VERSION_MAJOR));
+ getModule().addModuleFlag(llvm::Module::Warning,
+ "zos_product_minor_version",
+ uint32_t(CLANG_VERSION_MINOR));
+ getModule().addModuleFlag(llvm::Module::Warning, "zos_product_patchlevel",
+ uint32_t(CLANG_VERSION_PATCHLEVEL));
+ std::string ProductId;
+#ifdef CLANG_VENDOR
+ ProductId = #CLANG_VENDOR;
+#else
+ ProductId = "clang";
+#endif
+ getModule().addModuleFlag(llvm::Module::Error, "zos_product_id",
+ llvm::MDString::get(VMContext, ProductId));
+
+ // Record the language because we need it for the PPA2.
+ StringRef lang_str = languageToString(
+ LangStandard::getLangStandardForKind(LangOpts.LangStd).Language);
+ getModule().addModuleFlag(llvm::Module::Error, "zos_cu_language",
+ llvm::MDString::get(VMContext, lang_str));
+
+ time_t TT = PreprocessorOpts.SourceDateEpoch
+ ? *PreprocessorOpts.SourceDateEpoch
+ : std::time(nullptr);
+ getModule().addModuleFlag(llvm::Module::Max, "zos_translation_time",
+ static_cast<uint64_t>(TT));
+
+ // Multiple modes will be supported here.
+ getModule().addModuleFlag(llvm::Module::Error, "zos_le_char_mode",
+ llvm::MDString::get(VMContext, "ascii"));
+ }
+
llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch();
if ( Arch == llvm::Triple::arm
|| Arch == llvm::Triple::armeb
@@ -1067,6 +1121,15 @@ void CodeGenModule::Release() {
"sign-return-address-with-bkey", 1);
}
+ if (CodeGenOpts.StackClashProtector)
+ getModule().addModuleFlag(
+ llvm::Module::Override, "probe-stack",
+ llvm::MDString::get(TheModule.getContext(), "inline-asm"));
+
+ if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096)
+ getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size",
+ CodeGenOpts.StackProbeSize);
+
if (!CodeGenOpts.MemoryProfileOutput.empty()) {
llvm::LLVMContext &Ctx = TheModule.getContext();
getModule().addModuleFlag(
@@ -1137,6 +1200,12 @@ void CodeGenModule::Release() {
if (CM != ~0u) {
llvm::CodeModel::Model codeModel = static_cast<llvm::CodeModel::Model>(CM);
getModule().setCodeModel(codeModel);
+
+ if (CM == llvm::CodeModel::Medium &&
+ Context.getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::x86_64) {
+ getModule().setLargeDataThreshold(getCodeGenOpts().LargeDataThreshold);
+ }
}
}
@@ -1196,11 +1265,15 @@ void CodeGenModule::Release() {
getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment);
if (getCodeGenOpts().SkipRaxSetup)
getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1);
+ if (getLangOpts().RegCall4)
+ getModule().addModuleFlag(llvm::Module::Override, "RegCallv4", 1);
if (getContext().getTargetInfo().getMaxTLSAlign())
getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign",
getContext().getTargetInfo().getMaxTLSAlign());
+ getTargetCodeGenInfo().emitTargetGlobals(*this);
+
getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames);
EmitBackendOptionsMetadata(getCodeGenOpts());
@@ -1371,9 +1444,24 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV,
}
if (!D)
return;
+
// Set visibility for definitions, and for declarations if requested globally
// or set explicitly.
LinkageInfo LV = D->getLinkageAndVisibility();
+
+ // OpenMP declare target variables must be visible to the host so they can
+ // be registered. We require protected visibility unless the variable has
+ // the DT_nohost modifier and does not need to be registered.
+ if (Context.getLangOpts().OpenMP &&
+ Context.getLangOpts().OpenMPIsTargetDevice && isa<VarDecl>(D) &&
+ D->hasAttr<OMPDeclareTargetDeclAttr>() &&
+ D->getAttr<OMPDeclareTargetDeclAttr>()->getDevType() !=
+ OMPDeclareTargetDeclAttr::DT_NoHost &&
+ LV.getVisibility() == HiddenVisibility) {
+ GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
+ return;
+ }
+
if (GV->hasDLLExportStorageClass() || GV->hasDLLImportStorageClass()) {
// Reject incompatible dlllstorage and visibility annotations.
if (!LV.isVisibilityExplicit())
@@ -1407,6 +1495,7 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
return false;
const llvm::Triple &TT = CGM.getTriple();
+ const auto &CGOpts = CGM.getCodeGenOpts();
if (TT.isWindowsGNUEnvironment()) {
// In MinGW, variables without DLLImport can still be automatically
// imported from a DLL by the linker; don't mark variables that
@@ -1417,7 +1506,8 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
// such variables can't be marked as DSO local. (Native TLS variables
// can't be dllimported at all, though.)
if (GV->isDeclarationForLinker() && isa<llvm::GlobalVariable>(GV) &&
- (!GV->isThreadLocal() || CGM.getCodeGenOpts().EmulatedTLS))
+ (!GV->isThreadLocal() || CGM.getCodeGenOpts().EmulatedTLS) &&
+ CGOpts.AutoImport)
return false;
}
@@ -1440,7 +1530,6 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
return false;
// If this is not an executable, don't assume anything is local.
- const auto &CGOpts = CGM.getCodeGenOpts();
llvm::Reloc::Model RM = CGOpts.RelocationModel;
const auto &LOpts = CGM.getLangOpts();
if (RM != llvm::Reloc::Static && !LOpts.PIE) {
@@ -1707,7 +1796,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
if (FD &&
FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
- Out << "__regcall3__" << II->getName();
+ if (CGM.getLangOpts().RegCall4)
+ Out << "__regcall4__" << II->getName();
+ else
+ Out << "__regcall3__" << II->getName();
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__device_stub__" << II->getName();
@@ -1936,9 +2028,9 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
for (const auto &I : Fns) {
auto ctor = ctors.beginStruct(CtorStructTy);
ctor.addInt(Int32Ty, I.Priority);
- ctor.add(llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy));
+ ctor.add(I.Initializer);
if (I.AssociatedData)
- ctor.add(llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy));
+ ctor.add(I.AssociatedData);
else
ctor.addNullPointer(VoidPtrTy);
ctor.finishAndAddTo(ctors);
@@ -1965,16 +2057,7 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(D))
return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType());
- if (isa<CXXConstructorDecl>(D) &&
- cast<CXXConstructorDecl>(D)->isInheritingConstructor() &&
- Context.getTargetInfo().getCXXABI().isMicrosoft()) {
- // Our approach to inheriting constructors is fundamentally different from
- // that used by the MS ABI, so keep our inheriting constructor thunks
- // internal rather than trying to pick an unambiguous mangling for them.
- return llvm::GlobalValue::InternalLinkage;
- }
-
- return getLLVMLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false);
+ return getLLVMLinkageForDeclarator(D, Linkage);
}
llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
@@ -1992,7 +2075,7 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
std::string OutName;
llvm::raw_string_ostream Out(OutName);
- getCXXABI().getMangleContext().mangleTypeName(
+ getCXXABI().getMangleContext().mangleCanonicalTypeName(
T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers);
if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers)
@@ -2232,11 +2315,11 @@ static bool requiresMemberFunctionPointerTypeMetadata(CodeGenModule &CGM,
// Only functions whose address can be taken with a member function pointer
// need this sort of type metadata.
- return !MD->isStatic() && !MD->isVirtual() && !isa<CXXConstructorDecl>(MD) &&
- !isa<CXXDestructorDecl>(MD);
+ return MD->isImplicitObjectMemberFunction() && !MD->isVirtual() &&
+ !isa<CXXConstructorDecl, CXXDestructorDecl>(MD);
}
-std::vector<const CXXRecordDecl *>
+SmallVector<const CXXRecordDecl *, 0>
CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) {
llvm::SetVector<const CXXRecordDecl *> MostBases;
@@ -2261,19 +2344,23 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
if (CodeGenOpts.StackClashProtector)
B.addAttribute("probe-stack", "inline-asm");
+ if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096)
+ B.addAttribute("stack-probe-size",
+ std::to_string(CodeGenOpts.StackProbeSize));
+
if (!hasUnwindExceptions(LangOpts))
B.addAttribute(llvm::Attribute::NoUnwind);
if (D && D->hasAttr<NoStackProtectorAttr>())
; // Do nothing.
else if (D && D->hasAttr<StrictGuardStackCheckAttr>() &&
- LangOpts.getStackProtector() == LangOptions::SSPOn)
+ isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn))
B.addAttribute(llvm::Attribute::StackProtectStrong);
- else if (LangOpts.getStackProtector() == LangOptions::SSPOn)
+ else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn))
B.addAttribute(llvm::Attribute::StackProtect);
- else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
+ else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPStrong))
B.addAttribute(llvm::Attribute::StackProtectStrong);
- else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
+ else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPReq))
B.addAttribute(llvm::Attribute::StackProtectReq);
if (!D) {
@@ -2288,6 +2375,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
return;
}
+ // Handle SME attributes that apply to function definitions,
+ // rather than to function prototypes.
+ if (D->hasAttr<ArmLocallyStreamingAttr>())
+ B.addAttribute("aarch64_pstate_sm_body");
+
+ if (D->hasAttr<ArmNewZAAttr>())
+ B.addAttribute("aarch64_pstate_za_new");
+
// Track whether we need to add the optnone LLVM attribute,
// starting with the default for this optimization level.
bool ShouldAddOptNone =
@@ -2386,7 +2481,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// functions. If the current target's C++ ABI requires this and this is a
// member function, set its alignment accordingly.
if (getTarget().getCXXABI().areMemberFunctionsAligned()) {
- if (F->getPointerAlignment(getDataLayout()) < 2 && isa<CXXMethodDecl>(D))
+ if (isa<CXXMethodDecl>(D) && F->getPointerAlignment(getDataLayout()) < 2)
F->setAlignment(std::max(llvm::Align(2), F->getAlign().valueOrOne()));
}
@@ -2893,6 +2988,9 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
}
void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) {
+ assert(Primary->isNamedModuleUnit() &&
+ "We should only emit module initializers for named modules.");
+
// Emit the initializers in the order that sub-modules appear in the
// source, first Global Module Fragments, if present.
if (auto GMF = Primary->getGlobalModuleFragment()) {
@@ -2913,6 +3011,9 @@ void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) {
// Third any associated with the Privat eMOdule Fragment, if present.
if (auto PMF = Primary->getPrivateModuleFragment()) {
for (Decl *D : getContext().getModuleInitializers(PMF)) {
+ // Skip import decls, the inits for those are called explicitly.
+ if (isa<ImportDecl>(D))
+ continue;
assert(isa<VarDecl>(D) && "PMF initializer decl is not a var?");
EmitTopLevelDecl(D);
}
@@ -3078,6 +3179,13 @@ void CodeGenModule::EmitVTablesOpportunistically() {
}
void CodeGenModule::EmitGlobalAnnotations() {
+ for (const auto& [MangledName, VD] : DeferredAnnotations) {
+ llvm::GlobalValue *GV = GetGlobalValue(MangledName);
+ if (GV)
+ AddGlobalAnnotations(VD, GV);
+ }
+ DeferredAnnotations.clear();
+
if (Annotations.empty())
return;
@@ -3150,10 +3258,9 @@ llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) {
".args");
GV->setSection(AnnotationSection);
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- auto *Bitcasted = llvm::ConstantExpr::getBitCast(GV, GlobalsInt8PtrTy);
- Lookup = Bitcasted;
- return Bitcasted;
+ Lookup = GV;
+ return GV;
}
llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV,
@@ -3169,17 +3276,14 @@ llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV,
if (GV->getAddressSpace() !=
getDataLayout().getDefaultGlobalsAddressSpace()) {
GVInGlobalsAS = llvm::ConstantExpr::getAddrSpaceCast(
- GV, GV->getValueType()->getPointerTo(
- getDataLayout().getDefaultGlobalsAddressSpace()));
+ GV,
+ llvm::PointerType::get(
+ GV->getContext(), getDataLayout().getDefaultGlobalsAddressSpace()));
}
// Create the ConstantStruct for the global annotation.
llvm::Constant *Fields[] = {
- llvm::ConstantExpr::getBitCast(GVInGlobalsAS, GlobalsInt8PtrTy),
- llvm::ConstantExpr::getBitCast(AnnoGV, ConstGlobalsPtrTy),
- llvm::ConstantExpr::getBitCast(UnitGV, ConstGlobalsPtrTy),
- LineNoCst,
- Args,
+ GVInGlobalsAS, AnnoGV, UnitGV, LineNoCst, Args,
};
return llvm::ConstantStruct::getAnon(Fields);
}
@@ -3200,7 +3304,7 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn,
return true;
// NoSanitize by location. Check "mainfile" prefix.
auto &SM = Context.getSourceManager();
- const FileEntry &MainFile = *SM.getFileEntryForID(SM.getMainFileID());
+ FileEntryRef MainFile = *SM.getFileEntryRefForID(SM.getMainFileID());
if (NoSanitizeL.containsMainFile(Kind, MainFile.getName()))
return true;
@@ -3221,7 +3325,8 @@ bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind,
return true;
auto &SM = Context.getSourceManager();
if (NoSanitizeL.containsMainFile(
- Kind, SM.getFileEntryForID(SM.getMainFileID())->getName(), Category))
+ Kind, SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
+ Category))
return true;
if (NoSanitizeL.containsLocation(Kind, Loc, Category))
return true;
@@ -3287,7 +3392,7 @@ CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn,
// If location is unknown, this may be a compiler-generated function. Assume
// it's located in the main file.
auto &SM = Context.getSourceManager();
- if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID()))
+ if (auto MainFile = SM.getFileEntryRefForID(SM.getMainFileID()))
if (auto V = ProfileList.isFileExcluded(MainFile->getName(), Kind))
return *V;
return ProfileList.getDefault(Kind);
@@ -3364,7 +3469,7 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
// codegen for global variables, because they may be marked as threadprivate.
if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS &&
getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global) &&
- !isTypeConstant(Global->getType(), false, false) &&
+ !Global->getType().isConstantStorage(getContext(), false, false) &&
!OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global))
return false;
@@ -3419,9 +3524,7 @@ ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) {
}
llvm::Type *Ty = getTypes().ConvertTypeForMem(GD->getType());
- llvm::Constant *Addr = llvm::ConstantExpr::getBitCast(
- GV, Ty->getPointerTo(GV->getAddressSpace()));
- return ConstantAddress(Addr, Ty, Alignment);
+ return ConstantAddress(GV, Ty, Alignment);
}
ConstantAddress CodeGenModule::GetAddrOfUnnamedGlobalConstantDecl(
@@ -3483,7 +3586,7 @@ ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject(
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
Emitter.finalize(GV);
- return ConstantAddress(GV, GV->getValueType(), Alignment);
+ return ConstantAddress(GV, GV->getValueType(), Alignment);
}
ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
@@ -3495,11 +3598,8 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
// See if there is already something with the target's name in the module.
llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee());
- if (Entry) {
- unsigned AS = getTypes().getTargetAddressSpace(VD->getType());
- auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS));
- return ConstantAddress(Ptr, DeclTy, Alignment);
- }
+ if (Entry)
+ return ConstantAddress(Entry, DeclTy, Alignment);
llvm::Constant *Aliasee;
if (isa<llvm::FunctionType>(DeclTy))
@@ -3517,6 +3617,14 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
return ConstantAddress(Aliasee, DeclTy, Alignment);
}
+template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
+ if (!D)
+ return false;
+ if (auto *A = D->getAttr<AttrT>())
+ return A->isImplicit();
+ return D->isImplicit();
+}
+
void CodeGenModule::EmitGlobal(GlobalDecl GD) {
const auto *Global = cast<ValueDecl>(GD.getDecl());
@@ -3538,14 +3646,24 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
return emitCPUDispatchDefinition(GD);
// If this is CUDA, be selective about which declarations we emit.
+ // Non-constexpr non-lambda implicit host device functions are not emitted
+ // unless they are used on device side.
if (LangOpts.CUDA) {
if (LangOpts.CUDAIsDevice) {
- if (!Global->hasAttr<CUDADeviceAttr>() &&
+ const auto *FD = dyn_cast<FunctionDecl>(Global);
+ if ((!Global->hasAttr<CUDADeviceAttr>() ||
+ (LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
+ hasImplicitAttr<CUDAHostAttr>(FD) &&
+ hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() &&
+ !isLambdaCallOperator(FD) &&
+ !getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) &&
!Global->hasAttr<CUDAGlobalAttr>() &&
!Global->hasAttr<CUDAConstantAttr>() &&
!Global->hasAttr<CUDASharedAttr>() &&
!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
- !Global->getType()->isCUDADeviceBuiltinTextureType())
+ !Global->getType()->isCUDADeviceBuiltinTextureType() &&
+ !(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
+ !Global->hasAttr<CUDAHostAttr>()))
return;
} else {
// We need to emit host-side 'shadows' for all global
@@ -3581,6 +3699,14 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Ignore declarations, they will be emitted on their first use.
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
+ // Update deferred annotations with the latest declaration if the function
+ // function was already used or defined.
+ if (FD->hasAttr<AnnotateAttr>()) {
+ StringRef MangledName = getMangledName(GD);
+ if (GetGlobalValue(MangledName))
+ DeferredAnnotations[MangledName] = FD;
+ }
+
// Forward declarations are emitted lazily on first use.
if (!FD->doesThisDeclarationHaveABody()) {
if (!FD->doesDeclarationForceExternallyVisibleDefinition())
@@ -3605,6 +3731,13 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Emit declaration of the must-be-emitted declare target variable.
if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
+
+ // If this variable has external storage and doesn't require special
+ // link handling we defer to its canonical definition.
+ if (VD->hasExternalStorage() &&
+ Res != OMPDeclareTargetDeclAttr::MT_Link)
+ return;
+
bool UnifiedMemoryEnabled =
getOpenMPRuntime().hasRequiresUnifiedSharedMemory();
if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
@@ -3638,6 +3771,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) {
// Emit the definition if it can't be deferred.
EmitGlobalDefinition(GD);
+ addEmittedDeferredDecl(GD);
return;
}
@@ -3657,7 +3791,6 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// The value must be emitted, but cannot be emitted eagerly.
assert(!MayBeEmittedEagerly(Global));
addDeferredDeclToEmit(GD);
- EmittedDeferredDecls[MangledName] = GD;
} else {
// Otherwise, remember that we saw a deferred decl with this name. The
// first use of the mangled name will cause it to move into
@@ -3798,10 +3931,22 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) {
bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage)
return true;
+
const auto *F = cast<FunctionDecl>(GD.getDecl());
if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>())
return false;
+ // We don't import function bodies from other named module units since that
+ // behavior may break ABI compatibility of the current unit.
+ if (const Module *M = F->getOwningModule();
+ M && M->getTopLevelModule()->isNamedModule() &&
+ getContext().getCurrentNamedModule() != M->getTopLevelModule() &&
+ !F->hasAttr<AlwaysInlineAttr>())
+ return false;
+
+ if (F->hasAttr<NoInlineAttr>())
+ return false;
+
if (F->hasAttr<DLLImportAttr>() && !F->hasAttr<AlwaysInlineAttr>()) {
// Check whether it would be safe to inline this dllimport function.
DLLImportFunctionVisitor Visitor;
@@ -3936,7 +4081,7 @@ TargetMVPriority(const TargetInfo &TI,
llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM,
GlobalDecl GD) {
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
- if (FD->getFormalLinkage() == InternalLinkage)
+ if (FD->getFormalLinkage() == Linkage::Internal)
return llvm::GlobalValue::InternalLinkage;
return llvm::GlobalValue::WeakODRLinkage;
}
@@ -4033,13 +4178,34 @@ void CodeGenModule::emitMultiVersionFunctions() {
}
llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD);
- if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant))
+ if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant)) {
ResolverConstant = IFunc->getResolver();
+ // In Aarch64, default versions of multiversioned functions are mangled to
+ // their 'normal' assembly name. This deviates from other targets which
+ // append a '.default' string. As a result we need to continue appending
+ // .ifunc in Aarch64.
+ // FIXME: Should Aarch64 mangling for 'default' multiversion function and
+ // in turn ifunc function match that of other targets?
+ if (FD->isTargetClonesMultiVersion() &&
+ !getTarget().getTriple().isAArch64()) {
+ const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
+ llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI);
+ std::string MangledName = getMangledNameImpl(
+ *this, GD, FD, /*OmitMultiVersionMangling=*/true);
+ // In prior versions of Clang, the mangling for ifuncs incorrectly
+ // included an .ifunc suffix. This alias is generated for backward
+ // compatibility. It is deprecated, and may be removed in the future.
+ auto *Alias = llvm::GlobalAlias::create(
+ DeclTy, 0, getMultiversionLinkage(*this, GD),
+ MangledName + ".ifunc", IFunc, &getModule());
+ SetCommonAttributes(FD, Alias);
+ }
+ }
llvm::Function *ResolverFunc = cast<llvm::Function>(ResolverConstant);
ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD));
- if (supportsCOMDAT())
+ if (!ResolverFunc->hasLocalLinkage() && supportsCOMDAT())
ResolverFunc->setComdat(
getModule().getOrInsertComdat(ResolverFunc->getName()));
@@ -4148,8 +4314,9 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
// always run on at least a 'pentium'). We do this by deleting the 'least
// advanced' (read, lowest mangling letter).
while (Options.size() > 1 &&
- llvm::X86::getCpuSupportsMask(
- (Options.end() - 2)->Conditions.Features) == 0) {
+ llvm::all_of(llvm::X86::getCpuSupportsMask(
+ (Options.end() - 2)->Conditions.Features),
+ [](auto X) { return X == 0; })) {
StringRef LHSName = (Options.end() - 2)->Function->getName();
StringRef RHSName = (Options.end() - 1)->Function->getName();
if (LHSName.compare(RHSName) < 0)
@@ -4200,10 +4367,19 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
// Holds the name of the resolver, in ifunc mode this is the ifunc (which has
// a separate resolver).
std::string ResolverName = MangledName;
- if (getTarget().supportsIFunc())
- ResolverName += ".ifunc";
- else if (FD->isTargetMultiVersion())
+ if (getTarget().supportsIFunc()) {
+ // In Aarch64, default versions of multiversioned functions are mangled to
+ // their 'normal' assembly name. This deviates from other targets which
+ // append a '.default' string. As a result we need to continue appending
+ // .ifunc in Aarch64.
+ // FIXME: Should Aarch64 mangling for 'default' multiversion function and
+ // in turn ifunc function match that of other targets?
+ if (!FD->isTargetClonesMultiVersion() ||
+ getTarget().getTriple().isAArch64())
+ ResolverName += ".ifunc";
+ } else if (FD->isTargetMultiVersion()) {
ResolverName += ".resolver";
+ }
// If the resolver has already been created, just return it.
if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName))
@@ -4325,8 +4501,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
// (If function is requested for a definition, we always need to create a new
// function, not just return a bitcast.)
if (!IsForDefinition)
- return llvm::ConstantExpr::getBitCast(
- Entry, Ty->getPointerTo(Entry->getAddressSpace()));
+ return Entry;
}
// This function doesn't have a complete type (for example, the return
@@ -4346,6 +4521,11 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
llvm::Function::Create(FTy, llvm::Function::ExternalLinkage,
Entry ? StringRef() : MangledName, &getModule());
+ // Store the declaration associated with this function so it is potentially
+ // updated by further declarations or definitions and emitted at the end.
+ if (D && D->hasAttr<AnnotateAttr>())
+ DeferredAnnotations[MangledName] = cast<ValueDecl>(D);
+
// If we already created a function with the same mangled name (but different
// type) before, take its name and add it to the list of functions to be
// replaced with F at the end of CodeGen.
@@ -4366,9 +4546,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
Entry->removeDeadConstantUsers();
}
- llvm::Constant *BC = llvm::ConstantExpr::getBitCast(
- F, Entry->getValueType()->getPointerTo(Entry->getAddressSpace()));
- addGlobalValReplacement(Entry, BC);
+ addGlobalValReplacement(Entry, F);
}
assert(F->getName() == MangledName && "name was uniqued!");
@@ -4397,7 +4575,6 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
// DeferredDeclsToEmit list, and remove it from DeferredDecls (since we
// don't need it anymore).
addDeferredDeclToEmit(DDI->second);
- EmittedDeferredDecls[DDI->first] = DDI->second;
DeferredDecls.erase(DDI);
// Otherwise, there are cases we have to worry about where we're
@@ -4431,8 +4608,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
return F;
}
- return llvm::ConstantExpr::getBitCast(F,
- Ty->getPointerTo(F->getAddressSpace()));
+ return F;
}
/// GetAddrOfFunction - Return the address of the given function. If Ty is
@@ -4469,7 +4645,7 @@ CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable,
cast<llvm::Function>(F->stripPointerCasts()), GD);
if (IsForDefinition)
return F;
- return llvm::ConstantExpr::getBitCast(Handle, Ty->getPointerTo());
+ return Handle;
}
return F;
}
@@ -4478,9 +4654,7 @@ llvm::Constant *CodeGenModule::GetFunctionStart(const ValueDecl *Decl) {
llvm::GlobalValue *F =
cast<llvm::GlobalValue>(GetAddrOfFunction(Decl)->stripPointerCasts());
- return llvm::ConstantExpr::getBitCast(
- llvm::NoCFIValue::get(F),
- llvm::Type::getInt8PtrTy(VMContext, F->getAddressSpace()));
+ return llvm::NoCFIValue::get(F);
}
static const FunctionDecl *
@@ -4561,27 +4735,6 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name,
return {FTy, C};
}
-/// isTypeConstant - Determine whether an object of this type can be emitted
-/// as a constant.
-///
-/// If ExcludeCtor is true, the duration when the object's constructor runs
-/// will not be considered. The caller will need to verify that the object is
-/// not written to during its construction. ExcludeDtor works similarly.
-bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor,
- bool ExcludeDtor) {
- if (!Ty.isConstant(Context) && !Ty->isReferenceType())
- return false;
-
- if (Context.getLangOpts().CPlusPlus) {
- if (const CXXRecordDecl *Record
- = Context.getBaseElementType(Ty)->getAsCXXRecordDecl())
- return ExcludeCtor && !Record->hasMutableFields() &&
- (Record->hasTrivialDestructor() || ExcludeDtor);
- }
-
- return true;
-}
-
/// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module,
/// create and return an llvm GlobalVariable with the specified type and address
/// space. If there is something in the module with the specified name, return
@@ -4638,15 +4791,14 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
}
// Make sure the result is of the correct type.
- if (Entry->getType()->getAddressSpace() != TargetAS) {
- return llvm::ConstantExpr::getAddrSpaceCast(Entry,
- Ty->getPointerTo(TargetAS));
- }
+ if (Entry->getType()->getAddressSpace() != TargetAS)
+ return llvm::ConstantExpr::getAddrSpaceCast(
+ Entry, llvm::PointerType::get(Ty->getContext(), TargetAS));
// (If global is requested for a definition, we always need to create a new
// global, not just return a bitcast.)
if (!IsForDefinition)
- return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo(TargetAS));
+ return Entry;
}
auto DAddrSpace = GetGlobalVarAddressSpace(D);
@@ -4662,9 +4814,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
GV->takeName(Entry);
if (!Entry->use_empty()) {
- llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(GV, Entry->getType());
- Entry->replaceAllUsesWith(NewPtrForOldDecl);
+ Entry->replaceAllUsesWith(GV);
}
Entry->eraseFromParent();
@@ -4678,7 +4828,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
// Move the potentially referenced deferred decl to the DeferredDeclsToEmit
// list, and remove it from DeferredDecls (since we don't need it anymore).
addDeferredDeclToEmit(DDI->second);
- EmittedDeferredDecls[DDI->first] = DDI->second;
DeferredDecls.erase(DDI);
}
@@ -4689,7 +4838,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
// FIXME: This code is overly simple and should be merged with other global
// handling.
- GV->setConstant(isTypeConstant(D->getType(), false, false));
+ GV->setConstant(D->getType().isConstantStorage(getContext(), false, false));
GV->setAlignment(getContext().getDeclAlign(D).getAsAlign());
@@ -4785,7 +4934,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty,
assert(getContext().getTargetAddressSpace(ExpectedAS) == TargetAS);
if (DAddrSpace != ExpectedAS) {
return getTargetCodeGenInfo().performAddrSpaceCast(
- *this, GV, DAddrSpace, ExpectedAS, Ty->getPointerTo(TargetAS));
+ *this, GV, DAddrSpace, ExpectedAS,
+ llvm::PointerType::get(getLLVMContext(), TargetAS));
}
return GV;
@@ -4843,9 +4993,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
GV->takeName(OldGV);
if (!OldGV->use_empty()) {
- llvm::Constant *NewPtrForOldDecl =
- llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
- OldGV->replaceAllUsesWith(NewPtrForOldDecl);
+ OldGV->replaceAllUsesWith(GV);
}
OldGV->eraseFromParent();
@@ -4997,7 +5145,8 @@ castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM,
if (AS != LangAS::Default)
Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast(
CGM, GV, AS, LangAS::Default,
- GV->getValueType()->getPointerTo(
+ llvm::PointerType::get(
+ CGM.getLLVMContext(),
CGM.getContext().getTargetAddressSpace(LangAS::Default)));
}
return Cast;
@@ -5015,7 +5164,7 @@ void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D,
return;
// Must have internal linkage and an ordinary name.
- if (!D->getIdentifier() || D->getFormalLinkage() != InternalLinkage)
+ if (!D->getIdentifier() || D->getFormalLinkage() != Linkage::Internal)
return;
// Must be in an extern "C" context. Entities declared directly within
@@ -5221,8 +5370,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
AddGlobalAnnotations(D, GV);
// Set the llvm linkage type as appropriate.
- llvm::GlobalValue::LinkageTypes Linkage =
- getLLVMLinkageVarDefinition(D, GV->isConstant());
+ llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D);
// CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
// the device. [...]"
@@ -5250,7 +5398,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// If it is safe to mark the global 'constant', do so now.
GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
- isTypeConstant(D->getType(), true, true));
+ D->getType().isConstantStorage(getContext(), true, true));
// If it is in a read-only section, mark it 'constant'.
if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {
@@ -5415,8 +5563,9 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
return false;
}
-llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
- const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable) {
+llvm::GlobalValue::LinkageTypes
+CodeGenModule::getLLVMLinkageForDeclarator(const DeclaratorDecl *D,
+ GVALinkage Linkage) {
if (Linkage == GVA_Internal)
return llvm::Function::InternalLinkage;
@@ -5486,10 +5635,10 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
return llvm::GlobalVariable::ExternalLinkage;
}
-llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition(
- const VarDecl *VD, bool IsConstant) {
+llvm::GlobalValue::LinkageTypes
+CodeGenModule::getLLVMLinkageVarDefinition(const VarDecl *VD) {
GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD);
- return getLLVMLinkageForDeclarator(VD, Linkage, IsConstant);
+ return getLLVMLinkageForDeclarator(VD, Linkage);
}
/// Replace the uses of a function that was declared with a non-proto type.
@@ -5663,8 +5812,8 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
AddGlobalCtor(Fn, CA->getPriority());
if (const DestructorAttr *DA = D->getAttr<DestructorAttr>())
AddGlobalDtor(Fn, DA->getPriority(), true);
- if (D->hasAttr<AnnotateAttr>())
- AddGlobalAnnotations(D, Fn);
+ if (getLangOpts().OpenMP && D->hasAttr<OMPDeclareTargetDeclAttr>())
+ getOpenMPRuntime().emitDeclareTargetFunction(D, GV);
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
@@ -5701,7 +5850,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default,
/*D=*/nullptr);
if (const auto *VD = dyn_cast<VarDecl>(GD.getDecl()))
- LT = getLLVMLinkageVarDefinition(VD, D->getType().isConstQualified());
+ LT = getLLVMLinkageVarDefinition(VD);
else
LT = getFunctionLinkage(GD);
}
@@ -5728,8 +5877,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
// Remove it and replace uses of it with the alias.
GA->takeName(Entry);
- Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GA,
- Entry->getType()));
+ Entry->replaceAllUsesWith(GA);
Entry->eraseFromParent();
} else {
GA->setName(MangledName);
@@ -5807,12 +5955,13 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
// Remove it and replace uses of it with the ifunc.
GIF->takeName(Entry);
- Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GIF,
- Entry->getType()));
+ Entry->replaceAllUsesWith(GIF);
Entry->eraseFromParent();
} else
GIF->setName(MangledName);
-
+ if (auto *F = dyn_cast<llvm::Function>(Resolver)) {
+ F->addFnAttr(llvm::Attribute::DisableSanitizerInstrumentation);
+ }
SetCommonAttributes(GD, GIF);
}
@@ -5976,7 +6125,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
}
// Note: -fwritable-strings doesn't make the backing store strings of
- // CFStrings writable. (See <rdar://problem/10657500>)
+ // CFStrings writable.
auto *GV =
new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
llvm::GlobalValue::PrivateLinkage, C, ".str");
@@ -6002,9 +6151,6 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
llvm::Constant *Str =
llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros);
- if (isUTF16)
- // Cast the UTF16 string to the correct type.
- Str = llvm::ConstantExpr::getBitCast(Str, Int8PtrTy);
Fields.add(Str);
// String length.
@@ -6062,12 +6208,10 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() {
D->startDefinition();
QualType FieldTypes[] = {
- Context.UnsignedLongTy,
- Context.getPointerType(Context.getObjCIdType()),
- Context.getPointerType(Context.UnsignedLongTy),
- Context.getConstantArrayType(Context.UnsignedLongTy,
- llvm::APInt(32, 5), nullptr, ArrayType::Normal, 0)
- };
+ Context.UnsignedLongTy, Context.getPointerType(Context.getObjCIdType()),
+ Context.getPointerType(Context.UnsignedLongTy),
+ Context.getConstantArrayType(Context.UnsignedLongTy, llvm::APInt(32, 5),
+ nullptr, ArraySizeModifier::Normal, 0)};
for (size_t i = 0; i < 4; ++i) {
FieldDecl *Field = FieldDecl::Create(Context,
@@ -6295,7 +6439,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
VD, E->getManglingNumber(), Out);
APValue *Value = nullptr;
- if (E->getStorageDuration() == SD_Static && VD && VD->evaluateValue()) {
+ if (E->getStorageDuration() == SD_Static && VD->evaluateValue()) {
// If the initializer of the extending declaration is a constant
// initializer, we should have a cached constant initializer for this
// temporary. Note that this might have a different value from the value
@@ -6310,8 +6454,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
!EvalResult.hasSideEffects())
Value = &EvalResult.Val;
- LangAS AddrSpace =
- VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace();
+ LangAS AddrSpace = GetGlobalVarAddressSpace(VD);
std::optional<ConstantEmitter> emitter;
llvm::Constant *InitialValue = nullptr;
@@ -6322,8 +6465,9 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
emitter.emplace(*this);
InitialValue = emitter->emitForInitializer(*Value, AddrSpace,
MaterializedType);
- Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/ Value,
- /*ExcludeDtor*/ false);
+ Constant =
+ MaterializedType.isConstantStorage(getContext(), /*ExcludeCtor*/ Value,
+ /*ExcludeDtor*/ false);
Type = InitialValue->getType();
} else {
// No initializer, the initialization will be provided when we
@@ -6332,8 +6476,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
}
// Create a global variable for this lifetime-extended temporary.
- llvm::GlobalValue::LinkageTypes Linkage =
- getLLVMLinkageVarDefinition(VD, Constant);
+ llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(VD);
if (Linkage == llvm::GlobalVariable::ExternalLinkage) {
const VarDecl *InitVD;
if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) &&
@@ -6368,15 +6511,15 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
if (AddrSpace != LangAS::Default)
CV = getTargetCodeGenInfo().performAddrSpaceCast(
*this, GV, AddrSpace, LangAS::Default,
- Type->getPointerTo(
+ llvm::PointerType::get(
+ getLLVMContext(),
getContext().getTargetAddressSpace(LangAS::Default)));
// Update the map with the new temporary. If we created a placeholder above,
// replace it with the new global now.
llvm::Constant *&Entry = MaterializedGlobalTemporaryMap[E];
if (Entry) {
- Entry->replaceAllUsesWith(
- llvm::ConstantExpr::getBitCast(CV, Entry->getType()));
+ Entry->replaceAllUsesWith(CV);
llvm::cast<llvm::GlobalVariable>(Entry)->eraseFromParent();
}
Entry = CV;
@@ -6446,7 +6589,7 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) {
/*isInstance=*/true, /*isVariadic=*/false,
/*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false,
/*isImplicitlyDeclared=*/true,
- /*isDefined=*/false, ObjCMethodDecl::Required);
+ /*isDefined=*/false, ObjCImplementationControl::Required);
D->addInstanceMethod(DTORMethod);
CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, DTORMethod, false);
D->setHasDestructors(true);
@@ -6467,7 +6610,7 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) {
/*isVariadic=*/false,
/*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false,
/*isImplicitlyDeclared=*/true,
- /*isDefined=*/false, ObjCMethodDecl::Required);
+ /*isDefined=*/false, ObjCImplementationControl::Required);
D->addInstanceMethod(CTORMethod);
CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, CTORMethod, true);
D->setHasNonZeroConstructors(true);
@@ -6475,8 +6618,8 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) {
// EmitLinkageSpec - Emit all declarations in a linkage spec.
void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) {
- if (LSD->getLanguage() != LinkageSpecDecl::lang_c &&
- LSD->getLanguage() != LinkageSpecDecl::lang_cxx) {
+ if (LSD->getLanguage() != LinkageSpecLanguageIDs::C &&
+ LSD->getLanguage() != LinkageSpecLanguageIDs::CXX) {
ErrorUnsupported(LSD, "linkage spec");
return;
}
@@ -6856,9 +6999,7 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) {
SourceManager &SM = getContext().getSourceManager();
if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc()))
break;
- auto I = DeferredEmptyCoverageMappingDecls.find(D);
- if (I == DeferredEmptyCoverageMappingDecls.end())
- DeferredEmptyCoverageMappingDecls[D] = true;
+ DeferredEmptyCoverageMappingDecls.try_emplace(D, true);
break;
}
default:
@@ -6874,11 +7015,7 @@ void CodeGenModule::ClearUnusedCoverageMapping(const Decl *D) {
if (Fn->isTemplateInstantiation())
ClearUnusedCoverageMapping(Fn->getTemplateInstantiationPattern());
}
- auto I = DeferredEmptyCoverageMappingDecls.find(D);
- if (I == DeferredEmptyCoverageMappingDecls.end())
- DeferredEmptyCoverageMappingDecls[D] = false;
- else
- I->second = false;
+ DeferredEmptyCoverageMappingDecls.insert_or_assign(D, false);
}
void CodeGenModule::EmitDeferredUnusedCoverageMappings() {
@@ -7194,7 +7331,7 @@ CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map,
if (isExternallyVisible(T->getLinkage())) {
std::string OutName;
llvm::raw_string_ostream Out(OutName);
- getCXXABI().getMangleContext().mangleTypeName(
+ getCXXABI().getMangleContext().mangleCanonicalTypeName(
T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers);
if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers)
@@ -7428,7 +7565,7 @@ void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
// Get the UniqueID for the file containing the decl.
llvm::sys::fs::UniqueID ID;
- if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
+ if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
PLoc = SM.getPresumedLoc(D->getLocation(), /*UseLineDirectives=*/false);
assert(PLoc.isValid() && "Source location is expected to be valid.");
if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
@@ -7448,6 +7585,8 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) {
assert(NewBuilder->DeferredDecls.empty() &&
"Newly created module should not have deferred decls");
NewBuilder->DeferredDecls = std::move(DeferredDecls);
+ assert(EmittedDeferredDecls.empty() &&
+ "Still have (unmerged) EmittedDeferredDecls deferred decls");
assert(NewBuilder->DeferredVTables.empty() &&
"Newly created module should not have deferred vtables");
@@ -7463,10 +7602,5 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) {
NewBuilder->TBAA = std::move(TBAA);
- assert(NewBuilder->EmittedDeferredDecls.empty() &&
- "Still have (unmerged) EmittedDeferredDecls deferred decls");
-
- NewBuilder->EmittedDeferredDecls = std::move(EmittedDeferredDecls);
-
NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx);
}
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 05cb217e2bee..ec34680fd3f7 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -26,6 +26,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/NoSanitizeList.h"
+#include "clang/Basic/ProfileList.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/XRayLists.h"
#include "clang/Lex/PreprocessorOptions.h"
@@ -214,16 +215,14 @@ struct ObjCEntrypoints {
/// This class records statistics on instrumentation based profiling.
class InstrProfStats {
- uint32_t VisitedInMainFile;
- uint32_t MissingInMainFile;
- uint32_t Visited;
- uint32_t Missing;
- uint32_t Mismatched;
+ uint32_t VisitedInMainFile = 0;
+ uint32_t MissingInMainFile = 0;
+ uint32_t Visited = 0;
+ uint32_t Missing = 0;
+ uint32_t Mismatched = 0;
public:
- InstrProfStats()
- : VisitedInMainFile(0), MissingInMainFile(0), Visited(0), Missing(0),
- Mismatched(0) {}
+ InstrProfStats() = default;
/// Record that we've visited a function and whether or not that function was
/// in the main source file.
void addVisited(bool MainFile) {
@@ -361,10 +360,19 @@ private:
llvm::DenseMap<llvm::StringRef, GlobalDecl> EmittedDeferredDecls;
void addEmittedDeferredDecl(GlobalDecl GD) {
- if (!llvm::isa<FunctionDecl>(GD.getDecl()))
+ // Reemission is only needed in incremental mode.
+ if (!Context.getLangOpts().IncrementalExtensions)
return;
- llvm::GlobalVariable::LinkageTypes L = getFunctionLinkage(GD);
- if (llvm::GlobalValue::isLinkOnceLinkage(L) ||
+
+ // Assume a linkage by default that does not need reemission.
+ auto L = llvm::GlobalValue::ExternalLinkage;
+ if (llvm::isa<FunctionDecl>(GD.getDecl()))
+ L = getFunctionLinkage(GD);
+ else if (auto *VD = llvm::dyn_cast<VarDecl>(GD.getDecl()))
+ L = getLLVMLinkageVarDefinition(VD);
+
+ if (llvm::GlobalValue::isInternalLinkage(L) ||
+ llvm::GlobalValue::isLinkOnceLinkage(L) ||
llvm::GlobalValue::isWeakLinkage(L)) {
EmittedDeferredDecls[getMangledName(GD)] = GD;
}
@@ -423,6 +431,10 @@ private:
/// Global annotations.
std::vector<llvm::Constant*> Annotations;
+ // Store deferred function annotations so they can be emitted at the end with
+ // most up to date ValueDecl that will have all the inherited annotations.
+ llvm::DenseMap<StringRef, const ValueDecl *> DeferredAnnotations;
+
/// Map used to get unique annotation strings.
llvm::StringMap<llvm::Constant*> AnnotationStrings;
@@ -814,8 +826,6 @@ public:
return getTBAAAccessInfo(AccessType);
}
- bool isTypeConstant(QualType QTy, bool ExcludeCtor, bool ExcludeDtor);
-
bool isPaddedAtomicType(QualType type);
bool isPaddedAtomicType(const AtomicType *type);
@@ -1019,11 +1029,6 @@ public:
/// Return a pointer to a constant CFString object for the given string.
ConstantAddress GetAddrOfConstantCFString(const StringLiteral *Literal);
- /// Return a pointer to a constant NSString object for the given string. Or a
- /// user defined String object as defined via
- /// -fconstant-string-class=class_name option.
- ConstantAddress GetAddrOfConstantString(const StringLiteral *Literal);
-
/// Return a constant array for the given string.
llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E);
@@ -1259,26 +1264,11 @@ public:
llvm::AttributeList &Attrs, unsigned &CallingConv,
bool AttrOnCallSite, bool IsThunk);
- /// Adds attributes to F according to our CodeGenOptions and LangOptions, as
- /// though we had emitted it ourselves. We remove any attributes on F that
- /// conflict with the attributes we add here.
- ///
- /// This is useful for adding attrs to bitcode modules that you want to link
- /// with but don't control, such as CUDA's libdevice. When linking with such
- /// a bitcode library, you might want to set e.g. its functions'
- /// "unsafe-fp-math" attribute to match the attr of the functions you're
- /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of
- /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
- /// will propagate unsafe-fp-math=false up to every transitive caller of a
- /// function in the bitcode library!
- ///
- /// With the exception of fast-math attrs, this will only make the attributes
- /// on the function more conservative. But it's unsafe to call this on a
- /// function which relies on particular fast-math attributes for correctness.
- /// It's up to you to ensure that this is safe.
- void addDefaultFunctionDefinitionAttributes(llvm::Function &F);
- void mergeDefaultFunctionDefinitionAttributes(llvm::Function &F,
- bool WillInternalize);
+ /// Adjust Memory attribute to ensure that the BE gets the right attribute
+ // in order to generate the library call or the intrinsic for the function
+ // name 'Name'.
+ void AdjustMemoryAttribute(StringRef Name, CGCalleeInfo CalleeInfo,
+ llvm::AttributeList &Attrs);
/// Like the overload taking a `Function &`, but intended specifically
/// for frontends that want to build on Clang's target-configuration logic.
@@ -1321,12 +1311,11 @@ public:
/// Returns LLVM linkage for a declarator.
llvm::GlobalValue::LinkageTypes
- getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage,
- bool IsConstantVariable);
+ getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage);
/// Returns LLVM linkage for a declarator.
llvm::GlobalValue::LinkageTypes
- getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant);
+ getLLVMLinkageVarDefinition(const VarDecl *VD);
/// Emit all the global annotations.
void EmitGlobalAnnotations();
@@ -1505,7 +1494,7 @@ public:
///
/// A most-base class of a class C is defined as a recursive base class of C,
/// including C itself, that does not have any bases.
- std::vector<const CXXRecordDecl *>
+ SmallVector<const CXXRecordDecl *, 0>
getMostBaseClasses(const CXXRecordDecl *RD);
/// Get the declaration of std::terminate for the platform.
@@ -1557,6 +1546,41 @@ public:
/// because we'll lose all important information after each repl.
void moveLazyEmissionStates(CodeGenModule *NewBuilder);
+ /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F.
+ /// If \p MaxThreadsVal is not nullptr, the max threads value is stored in it,
+ /// if a valid one was found.
+ void handleCUDALaunchBoundsAttr(llvm::Function *F,
+ const CUDALaunchBoundsAttr *A,
+ int32_t *MaxThreadsVal = nullptr,
+ int32_t *MinBlocksVal = nullptr,
+ int32_t *MaxClusterRankVal = nullptr);
+
+ /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute
+ /// to \p F. Alternatively, the work group size can be taken from a \p
+ /// ReqdWGS. If \p MinThreadsVal is not nullptr, the min threads value is
+ /// stored in it, if a valid one was found. If \p MaxThreadsVal is not
+ /// nullptr, the max threads value is stored in it, if a valid one was found.
+ void handleAMDGPUFlatWorkGroupSizeAttr(
+ llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A,
+ const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr,
+ int32_t *MinThreadsVal = nullptr, int32_t *MaxThreadsVal = nullptr);
+
+ /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F.
+ void handleAMDGPUWavesPerEUAttr(llvm::Function *F,
+ const AMDGPUWavesPerEUAttr *A);
+
+ llvm::Constant *
+ GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace,
+ const VarDecl *D,
+ ForDefinition_t IsForDefinition = NotForDefinition);
+
+ // FIXME: Hardcoding priority here is gross.
+ void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
+ unsigned LexOrder = ~0U,
+ llvm::Constant *AssociatedData = nullptr);
+ void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
+ bool IsDtorAttrFunc = false);
+
private:
llvm::Constant *GetOrCreateLLVMFunction(
StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
@@ -1579,11 +1603,6 @@ private:
void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD,
StringRef &CurName);
- llvm::Constant *
- GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace,
- const VarDecl *D,
- ForDefinition_t IsForDefinition = NotForDefinition);
-
bool GetCPUAndFeaturesAttributes(GlobalDecl GD,
llvm::AttrBuilder &AttrBuilder,
bool SetTargetFeatures = true);
@@ -1633,13 +1652,6 @@ private:
void EmitPointerToInitFunc(const VarDecl *VD, llvm::GlobalVariable *Addr,
llvm::Function *InitFunc, InitSegAttr *ISA);
- // FIXME: Hardcoding priority here is gross.
- void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
- unsigned LexOrder = ~0U,
- llvm::Constant *AssociatedData = nullptr);
- void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535,
- bool IsDtorAttrFunc = false);
-
/// EmitCtorList - Generates a global array of functions and priorities using
/// the given list and name. This array will have appending linkage and is
/// suitable for use as a LLVM constructor or destructor array. Clears Fns.
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index b80317529b72..81bf8ea696b1 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -376,9 +376,9 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
/// BreakContinueStack - Keep counts of breaks and continues inside loops.
struct BreakContinue {
- uint64_t BreakCount;
- uint64_t ContinueCount;
- BreakContinue() : BreakCount(0), ContinueCount(0) {}
+ uint64_t BreakCount = 0;
+ uint64_t ContinueCount = 0;
+ BreakContinue() = default;
};
SmallVector<BreakContinue, 8> BreakContinueStack;
@@ -755,7 +755,8 @@ void PGOHash::combine(HashType Type) {
// Pass through MD5 if enough work has built up.
if (Count && Count % NumTypesPerWord == 0) {
using namespace llvm::support;
- uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
+ uint64_t Swapped =
+ endian::byte_swap<uint64_t, llvm::endianness::little>(Working);
MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
Working = 0;
}
@@ -781,7 +782,8 @@ uint64_t PGOHash::finalize() {
MD5.update({(uint8_t)Working});
} else {
using namespace llvm::support;
- uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
+ uint64_t Swapped =
+ endian::byte_swap<uint64_t, llvm::endianness::little>(Working);
MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
}
}
@@ -952,15 +954,12 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
llvm::Value *StepV) {
- if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
- return;
- if (!Builder.GetInsertBlock())
+ if (!RegionCounterMap || !Builder.GetInsertBlock())
return;
unsigned Counter = (*RegionCounterMap)[S];
- auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
- llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+ llvm::Value *Args[] = {FuncNameVar,
Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
Builder.getInt32(Counter), StepV};
@@ -998,7 +997,7 @@ void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
auto BuilderInsertPoint = Builder.saveIP();
Builder.SetInsertPoint(ValueSite);
llvm::Value *Args[5] = {
- llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
+ FuncNameVar,
Builder.getInt64(FunctionHash),
Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
Builder.getInt32(ValueKind),
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 395ed7b1d703..dc288bc3f615 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -196,16 +196,19 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
// Enum types are distinct types. In C++ they have "underlying types",
// however they aren't related for TBAA.
if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) {
+ if (!Features.CPlusPlus)
+ return getTypeInfo(ETy->getDecl()->getIntegerType());
+
// In C++ mode, types have linkage, so we can rely on the ODR and
// on their mangled names, if they're external.
// TODO: Is there a way to get a program-wide unique name for a
// decl with local linkage or no linkage?
- if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible())
+ if (!ETy->getDecl()->isExternallyVisible())
return getChar();
SmallString<256> OutName;
llvm::raw_svector_ostream Out(OutName);
- MContext.mangleTypeName(QualType(ETy, 0), Out);
+ MContext.mangleCanonicalTypeName(QualType(ETy, 0), Out);
return createScalarTypeNode(OutName, getChar(), Size);
}
@@ -342,7 +345,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
// field. Virtual bases are more complex and omitted, but avoid an
// incomplete view for NewStructPathTBAA.
if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0)
- return BaseTypeMetadataCache[Ty] = nullptr;
+ return nullptr;
for (const CXXBaseSpecifier &B : CXXRD->bases()) {
if (B.isVirtual())
continue;
@@ -354,7 +357,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
? getBaseTypeInfo(BaseQTy)
: getTypeInfo(BaseQTy);
if (!TypeNode)
- return BaseTypeMetadataCache[Ty] = nullptr;
+ return nullptr;
uint64_t Offset = Layout.getBaseClassOffset(BaseRD).getQuantity();
uint64_t Size =
Context.getASTRecordLayout(BaseRD).getDataSize().getQuantity();
@@ -378,7 +381,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ?
getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy);
if (!TypeNode)
- return BaseTypeMetadataCache[Ty] = nullptr;
+ return nullptr;
uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex());
uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity();
@@ -391,7 +394,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
if (Features.CPlusPlus) {
// Don't use the mangler for C code.
llvm::raw_svector_ostream Out(OutName);
- MContext.mangleTypeName(QualType(Ty, 0), Out);
+ MContext.mangleCanonicalTypeName(QualType(Ty, 0), Out);
} else {
OutName = RD->getName();
}
@@ -418,14 +421,20 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) {
return nullptr;
const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
- if (llvm::MDNode *N = BaseTypeMetadataCache[Ty])
- return N;
- // Note that the following helper call is allowed to add new nodes to the
- // cache, which invalidates all its previously obtained iterators. So we
- // first generate the node for the type and then add that node to the cache.
+ // nullptr is a valid value in the cache, so use find rather than []
+ auto I = BaseTypeMetadataCache.find(Ty);
+ if (I != BaseTypeMetadataCache.end())
+ return I->second;
+
+ // First calculate the metadata, before recomputing the insertion point, as
+ // the helper can recursively call us.
llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
- return BaseTypeMetadataCache[Ty] = TypeNode;
+ LLVM_ATTRIBUTE_UNUSED auto inserted =
+ BaseTypeMetadataCache.insert({Ty, TypeNode});
+ assert(inserted.second && "BaseType metadata was already inserted");
+
+ return TypeNode;
}
llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) {
diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h
index e848dc3b449c..083d69214fb3 100644
--- a/clang/lib/CodeGen/CodeGenTypeCache.h
+++ b/clang/lib/CodeGen/CodeGenTypeCache.h
@@ -51,14 +51,11 @@ struct CodeGenTypeCache {
llvm::IntegerType *PtrDiffTy;
};
- /// void* in address space 0
+ /// void*, void** in address space 0
union {
+ llvm::PointerType *UnqualPtrTy;
llvm::PointerType *VoidPtrTy;
llvm::PointerType *Int8PtrTy;
- };
-
- /// void** in address space 0
- union {
llvm::PointerType *VoidPtrPtrTy;
llvm::PointerType *Int8PtrPtrTy;
};
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 30021794a0bb..a6b51bfef876 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -34,6 +34,7 @@ CodeGenTypes::CodeGenTypes(CodeGenModule &cgm)
Target(cgm.getTarget()), TheCXXABI(cgm.getCXXABI()),
TheABIInfo(cgm.getTargetCodeGenInfo().getABIInfo()) {
SkippedLayout = false;
+ LongDoubleReferenced = false;
}
CodeGenTypes::~CodeGenTypes() {
@@ -406,10 +407,12 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
Context.getLangOpts().NativeHalfType ||
!Context.getTargetInfo().useFP16ConversionIntrinsics());
break;
+ case BuiltinType::LongDouble:
+ LongDoubleReferenced = true;
+ LLVM_FALLTHROUGH;
case BuiltinType::BFloat16:
case BuiltinType::Float:
case BuiltinType::Double:
- case BuiltinType::LongDouble:
case BuiltinType::Float128:
case BuiltinType::Ibm128:
ResultType = getTypeForFormat(getLLVMContext(),
@@ -419,7 +422,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
case BuiltinType::NullPtr:
// Model std::nullptr_t as i8*
- ResultType = llvm::Type::getInt8PtrTy(getLLVMContext());
+ ResultType = llvm::PointerType::getUnqual(getLLVMContext());
break;
case BuiltinType::UInt128:
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 9088f77b95c3..01c0c673795c 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -84,6 +84,9 @@ class CodeGenTypes {
/// a recursive struct conversion, set this to true.
bool SkippedLayout;
+ /// True if any instance of long double types are used.
+ bool LongDoubleReferenced;
+
/// This map keeps cache of llvm::Types and maps clang::Type to
/// corresponding llvm::Type.
llvm::DenseMap<const Type *, llvm::Type *> TypeCache;
@@ -252,13 +255,11 @@ public:
/// this.
///
/// \param argTypes - must all actually be canonical as params
- const CGFunctionInfo &arrangeLLVMFunctionInfo(CanQualType returnType,
- bool instanceMethod,
- bool chainCall,
- ArrayRef<CanQualType> argTypes,
- FunctionType::ExtInfo info,
- ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
- RequiredArgs args);
+ const CGFunctionInfo &arrangeLLVMFunctionInfo(
+ CanQualType returnType, FnInfoOpts opts, ArrayRef<CanQualType> argTypes,
+ FunctionType::ExtInfo info,
+ ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
+ RequiredArgs args);
/// Compute a new LLVM record layout object for the given record.
std::unique_ptr<CGRecordLayout> ComputeRecordLayout(const RecordDecl *D,
@@ -291,6 +292,7 @@ public: // These are internal details of CGT that shouldn't be used externally.
/// zero-initialized (in the C++ sense) with an LLVM zeroinitializer.
bool isZeroInitializable(const RecordDecl *RD);
+ bool isLongDoubleReferenced() const { return LongDoubleReferenced; }
bool isRecordLayoutComplete(const Type *Ty) const;
unsigned getTargetAddressSpace(QualType T) const;
};
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index bb4c6f5e0cde..b16358ee117a 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -322,12 +322,12 @@ public:
for (const auto &FL : FileLocs) {
SourceLocation Loc = FL.first;
FileID SpellingFile = SM.getDecomposedSpellingLoc(Loc).first;
- auto Entry = SM.getFileEntryForID(SpellingFile);
+ auto Entry = SM.getFileEntryRefForID(SpellingFile);
if (!Entry)
continue;
FileIDMapping[SM.getFileID(Loc)] = std::make_pair(Mapping.size(), Loc);
- Mapping.push_back(CVM.getFileID(Entry));
+ Mapping.push_back(CVM.getFileID(*Entry));
}
}
@@ -1032,11 +1032,21 @@ struct CounterCoverageMappingBuilder
// lexer may not be able to report back precise token end locations for
// these children nodes (llvm.org/PR39822), and moreover users will not be
// able to see coverage for them.
+ Counter BodyCounter = getRegionCounter(Body);
bool Defaulted = false;
if (auto *Method = dyn_cast<CXXMethodDecl>(D))
Defaulted = Method->isDefaulted();
+ if (auto *Ctor = dyn_cast<CXXConstructorDecl>(D)) {
+ for (auto *Initializer : Ctor->inits()) {
+ if (Initializer->isWritten()) {
+ auto *Init = Initializer->getInit();
+ if (getStart(Init).isValid() && getEnd(Init).isValid())
+ propagateCounts(BodyCounter, Init);
+ }
+ }
+ }
- propagateCounts(getRegionCounter(Body), Body,
+ propagateCounts(BodyCounter, Body,
/*VisitChildren=*/!Defaulted);
assert(RegionStack.empty() && "Regions entered but never exited");
}
@@ -1718,13 +1728,11 @@ void CoverageMappingModuleGen::emitFunctionMappingRecord(
void CoverageMappingModuleGen::addFunctionMappingRecord(
llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
const std::string &CoverageMapping, bool IsUsed) {
- llvm::LLVMContext &Ctx = CGM.getLLVMContext();
const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue);
FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed});
if (!IsUsed)
- FunctionNames.push_back(
- llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
+ FunctionNames.push_back(NamePtr);
if (CGM.getCodeGenOpts().DumpCoverageMapping) {
// Dump the coverage mapping data for this function by decoding the
@@ -1740,7 +1748,7 @@ void CoverageMappingModuleGen::addFunctionMappingRecord(
FilenameStrs[0] = normalizeFilename(getCurrentDirname());
for (const auto &Entry : FileEntries) {
auto I = Entry.second;
- FilenameStrs[I] = normalizeFilename(Entry.first->getName());
+ FilenameStrs[I] = normalizeFilename(Entry.first.getName());
}
ArrayRef<std::string> FilenameRefs = llvm::ArrayRef(FilenameStrs);
RawCoverageMappingReader Reader(CoverageMapping, FilenameRefs, Filenames,
@@ -1764,7 +1772,7 @@ void CoverageMappingModuleGen::emit() {
FilenameStrs[0] = normalizeFilename(getCurrentDirname());
for (const auto &Entry : FileEntries) {
auto I = Entry.second;
- FilenameStrs[I] = normalizeFilename(Entry.first->getName());
+ FilenameStrs[I] = normalizeFilename(Entry.first.getName());
}
std::string Filenames;
@@ -1812,7 +1820,7 @@ void CoverageMappingModuleGen::emit() {
CGM.addUsedGlobal(CovData);
// Create the deferred function records array
if (!FunctionNames.empty()) {
- auto NamesArrTy = llvm::ArrayType::get(llvm::Type::getInt8PtrTy(Ctx),
+ auto NamesArrTy = llvm::ArrayType::get(llvm::PointerType::getUnqual(Ctx),
FunctionNames.size());
auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames);
// This variable will *NOT* be emitted to the object file. It is used
@@ -1823,7 +1831,7 @@ void CoverageMappingModuleGen::emit() {
}
}
-unsigned CoverageMappingModuleGen::getFileID(const FileEntry *File) {
+unsigned CoverageMappingModuleGen::getFileID(FileEntryRef File) {
auto It = FileEntries.find(File);
if (It != FileEntries.end())
return It->second;
diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h
index eca68d9abd79..77d7c6cd87cf 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.h
+++ b/clang/lib/CodeGen/CoverageMappingGen.h
@@ -104,7 +104,7 @@ class CoverageMappingModuleGen {
CodeGenModule &CGM;
CoverageSourceInfo &SourceInfo;
- llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries;
+ llvm::SmallDenseMap<FileEntryRef, unsigned, 8> FileEntries;
std::vector<llvm::Constant *> FunctionNames;
std::vector<FunctionInfo> FunctionRecords;
@@ -137,7 +137,7 @@ public:
/// Return the coverage mapping translation unit file id
/// for the given file.
- unsigned getFileID(const FileEntry *File);
+ unsigned getFileID(FileEntryRef File);
/// Return an interface into CodeGenModule.
CodeGenModule &getCodeGenModule() { return CGM; }
diff --git a/clang/lib/CodeGen/EHScopeStack.h b/clang/lib/CodeGen/EHScopeStack.h
index 3c8a51590d1b..0c667e80bb6d 100644
--- a/clang/lib/CodeGen/EHScopeStack.h
+++ b/clang/lib/CodeGen/EHScopeStack.h
@@ -166,10 +166,10 @@ public:
F_IsEHCleanupKind = 0x4,
F_HasExitSwitch = 0x8,
};
- unsigned flags;
+ unsigned flags = 0;
public:
- Flags() : flags(0) {}
+ Flags() = default;
/// isForEH - true if the current emission is for an EH cleanup.
bool isForEHCleanup() const { return flags & F_IsForEH; }
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 79a926cb9edd..d173806ec8ce 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -647,9 +647,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
// Apply the adjustment and cast back to the original struct type
// for consistency.
llvm::Value *This = ThisAddr.getPointer();
- llvm::Value *Ptr = Builder.CreateBitCast(This, Builder.getInt8PtrTy());
- Ptr = Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Ptr, Adj);
- This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted");
+ This = Builder.CreateInBoundsGEP(Builder.getInt8Ty(), This, Adj);
ThisPtrForCall = This;
// Load the function pointer.
@@ -740,9 +738,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
? llvm::Intrinsic::type_test
: llvm::Intrinsic::public_type_test;
- CheckResult = Builder.CreateCall(
- CGM.getIntrinsic(IID),
- {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId});
+ CheckResult =
+ Builder.CreateCall(CGM.getIntrinsic(IID), {VFPAddr, TypeId});
}
if (CGM.getItaniumVTableContext().isRelativeLayout()) {
@@ -753,9 +750,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
} else {
llvm::Value *VFPAddr =
CGF.Builder.CreateGEP(CGF.Int8Ty, VTable, VTableOffset);
- VirtualFn = CGF.Builder.CreateAlignedLoad(
- llvm::PointerType::getUnqual(CGF.getLLVMContext()), VFPAddr,
- CGF.getPointerAlign(), "memptr.virtualfn");
+ VirtualFn = CGF.Builder.CreateAlignedLoad(CGF.UnqualPtrTy, VFPAddr,
+ CGF.getPointerAlign(),
+ "memptr.virtualfn");
}
}
assert(VirtualFn && "Virtual fuction pointer not created!");
@@ -795,9 +792,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
// In the non-virtual path, the function pointer is actually a
// function pointer.
CGF.EmitBlock(FnNonVirtual);
- llvm::Value *NonVirtualFn = Builder.CreateIntToPtr(
- FnAsInt, llvm::PointerType::getUnqual(CGF.getLLVMContext()),
- "memptr.nonvirtualfn");
+ llvm::Value *NonVirtualFn =
+ Builder.CreateIntToPtr(FnAsInt, CGF.UnqualPtrTy, "memptr.nonvirtualfn");
// Check the function pointer if CFI on member function pointers is enabled.
if (ShouldEmitCFICheck) {
@@ -812,8 +808,6 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
};
llvm::Value *Bit = Builder.getFalse();
- llvm::Value *CastedNonVirtualFn =
- Builder.CreateBitCast(NonVirtualFn, CGF.Int8PtrTy);
for (const CXXRecordDecl *Base : CGM.getMostBaseClasses(RD)) {
llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(
getContext().getMemberPointerType(
@@ -824,13 +818,13 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
llvm::Value *TypeTest =
Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test),
- {CastedNonVirtualFn, TypeId});
+ {NonVirtualFn, TypeId});
Bit = Builder.CreateOr(Bit, TypeTest);
}
CGF.EmitCheck(std::make_pair(Bit, SanitizerKind::CFIMFCall),
SanitizerHandler::CFICheckFail, StaticData,
- {CastedNonVirtualFn, llvm::UndefValue::get(CGF.IntPtrTy)});
+ {NonVirtualFn, llvm::UndefValue::get(CGF.IntPtrTy)});
FnNonVirtual = Builder.GetInsertBlock();
}
@@ -838,8 +832,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
// We're done.
CGF.EmitBlock(FnEnd);
- llvm::PHINode *CalleePtr =
- Builder.CreatePHI(llvm::PointerType::getUnqual(CGF.getLLVMContext()), 2);
+ llvm::PHINode *CalleePtr = Builder.CreatePHI(CGF.UnqualPtrTy, 2);
CalleePtr->addIncoming(VirtualFn, FnVirtual);
CalleePtr->addIncoming(NonVirtualFn, FnNonVirtual);
@@ -1243,8 +1236,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
// Grab the vtable pointer as an intptr_t*.
auto *ClassDecl =
cast<CXXRecordDecl>(ElementType->castAs<RecordType>()->getDecl());
- llvm::Value *VTable = CGF.GetVTablePtr(
- Ptr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), ClassDecl);
+ llvm::Value *VTable = CGF.GetVTablePtr(Ptr, CGF.UnqualPtrTy, ClassDecl);
// Track back to entry -2 and pull out the offset there.
llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64(
@@ -1253,8 +1245,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
CGF.getPointerAlign());
// Apply the offset.
- llvm::Value *CompletePtr =
- CGF.Builder.CreateBitCast(Ptr.getPointer(), CGF.Int8PtrTy);
+ llvm::Value *CompletePtr = Ptr.getPointer();
CompletePtr =
CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, CompletePtr, Offset);
@@ -1333,7 +1324,6 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
if (!Record->hasTrivialDestructor()) {
CXXDestructorDecl *DtorD = Record->getDestructor();
Dtor = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete));
- Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy);
}
}
if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
@@ -1344,15 +1334,16 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) {
// void *__dynamic_cast(const void *sub,
- // const abi::__class_type_info *src,
- // const abi::__class_type_info *dst,
+ // global_as const abi::__class_type_info *src,
+ // global_as const abi::__class_type_info *dst,
// std::ptrdiff_t src2dst_offset);
llvm::Type *Int8PtrTy = CGF.Int8PtrTy;
+ llvm::Type *GlobInt8PtrTy = CGF.GlobalsInt8PtrTy;
llvm::Type *PtrDiffTy =
CGF.ConvertType(CGF.getContext().getPointerDiffType());
- llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy };
+ llvm::Type *Args[4] = { Int8PtrTy, GlobInt8PtrTy, GlobInt8PtrTy, PtrDiffTy };
llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false);
@@ -1449,12 +1440,11 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF,
llvm::Type *StdTypeInfoPtrTy) {
auto *ClassDecl =
cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl());
- llvm::Value *Value = CGF.GetVTablePtr(
- ThisPtr, llvm::PointerType::getUnqual(CGF.getLLVMContext()), ClassDecl);
+ llvm::Value *Value = CGF.GetVTablePtr(ThisPtr, CGM.GlobalsInt8PtrTy,
+ ClassDecl);
if (CGM.getItaniumVTableContext().isRelativeLayout()) {
// Load the type info.
- Value = CGF.Builder.CreateBitCast(Value, CGM.Int8PtrTy);
Value = CGF.Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}),
{Value, llvm::ConstantInt::get(CGM.Int32Ty, -4)});
@@ -1597,9 +1587,8 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF,
llvm::Value *OffsetToTop;
if (CGM.getItaniumVTableContext().isRelativeLayout()) {
// Get the vtable pointer.
- llvm::Value *VTable = CGF.GetVTablePtr(
- ThisAddr, llvm::PointerType::getUnqual(CGF.getLLVMContext()),
- ClassDecl);
+ llvm::Value *VTable =
+ CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, ClassDecl);
// Get the offset-to-top from the vtable.
OffsetToTop =
@@ -1611,9 +1600,8 @@ llvm::Value *ItaniumCXXABI::emitDynamicCastToVoid(CodeGenFunction &CGF,
CGF.ConvertType(CGF.getContext().getPointerDiffType());
// Get the vtable pointer.
- llvm::Value *VTable = CGF.GetVTablePtr(
- ThisAddr, llvm::PointerType::getUnqual(CGF.getLLVMContext()),
- ClassDecl);
+ llvm::Value *VTable =
+ CGF.GetVTablePtr(ThisAddr, CGF.UnqualPtrTy, ClassDecl);
// Get the offset-to-top from the vtable.
OffsetToTop =
@@ -1729,7 +1717,7 @@ void ItaniumCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
QualType T = Context.getPointerType(Q);
auto *VTTDecl = ImplicitParamDecl::Create(
Context, /*DC=*/nullptr, MD->getLocation(), &Context.Idents.get("vtt"),
- T, ImplicitParamDecl::CXXVTT);
+ T, ImplicitParamKind::CXXVTT);
Params.insert(Params.begin() + 1, VTTDecl);
getStructorImplicitParamDecl(CGF) = VTTDecl;
}
@@ -2211,8 +2199,7 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
NonVirtualAdjustment);
}
- // Cast back to the original type.
- return CGF.Builder.CreateBitCast(ResultPtr, InitialPtr.getType());
+ return ResultPtr;
}
llvm::Value *ItaniumCXXABI::performThisAdjustment(CodeGenFunction &CGF,
@@ -2315,8 +2302,8 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
// cookie, otherwise return 0 to avoid an infinite loop calling DTORs.
// We can't simply ignore this load using nosanitize metadata because
// the metadata may be lost.
- llvm::FunctionType *FTy = llvm::FunctionType::get(
- CGF.SizeTy, llvm::PointerType::getUnqual(CGF.getLLVMContext()), false);
+ llvm::FunctionType *FTy =
+ llvm::FunctionType::get(CGF.SizeTy, CGF.UnqualPtrTy, false);
llvm::FunctionCallee F =
CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie");
return CGF.Builder.CreateCall(F, numElementsPtr.getPointer());
@@ -2659,7 +2646,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
// We're assuming that the destructor function is something we can
// reasonably call with the default CC.
- llvm::Type *dtorTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
+ llvm::Type *dtorTy = CGF.UnqualPtrTy;
// Preserve address space of addr.
auto AddrAS = addr ? addr->getType()->getPointerAddressSpace() : 0;
@@ -2807,6 +2794,14 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
if (D.isNoDestroy(CGM.getContext()))
return;
+ // OpenMP offloading supports C++ constructors and destructors but we do not
+ // always have 'atexit' available. Instead lower these to use the LLVM global
+ // destructors which we can handle directly in the runtime. Note that this is
+ // not strictly 1-to-1 with using `atexit` because we no longer tear down
+ // globals in reverse order of when they were constructed.
+ if (!CGM.getLangOpts().hasAtExit() && !D.isStaticLocal())
+ return CGF.registerGlobalDtorWithLLVM(D, dtor, addr);
+
// emitGlobalDtorWithCXAAtExit will emit a call to either __cxa_thread_atexit
// or __cxa_atexit depending on whether this VarDecl is a thread-local storage
// or not. CXAAtExit controls only __cxa_atexit, so use it if it is enabled.
@@ -2839,7 +2834,7 @@ static bool isThreadWrapperReplaceable(const VarDecl *VD,
static llvm::GlobalValue::LinkageTypes
getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) {
llvm::GlobalValue::LinkageTypes VarLinkage =
- CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+ CGM.getLLVMLinkageVarDefinition(VD);
// For internal linkage variables, we don't need an external or weak wrapper.
if (llvm::GlobalValue::isLocalLinkage(VarLinkage))
@@ -3100,9 +3095,6 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
CharUnits Align = CGM.getContext().getDeclAlign(VD);
Val = Builder.CreateAlignedLoad(Var->getValueType(), Val, Align);
}
- if (Val->getType() != Wrapper->getReturnType())
- Val = Builder.CreatePointerBitCastOrAddrSpaceCast(
- Val, Wrapper->getReturnType(), "");
Builder.CreateRet(Val);
}
@@ -3676,9 +3668,10 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
// Check if the alias exists. If it doesn't, then get or create the global.
if (CGM.getItaniumVTableContext().isRelativeLayout())
VTable = CGM.getModule().getNamedAlias(VTableName);
- if (!VTable)
- VTable =
- CGM.getModule().getOrInsertGlobal(VTableName, CGM.GlobalsInt8PtrTy);
+ if (!VTable) {
+ llvm::Type *Ty = llvm::ArrayType::get(CGM.GlobalsInt8PtrTy, 0);
+ VTable = CGM.getModule().getOrInsertGlobal(VTableName, Ty);
+ }
CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts()));
@@ -3718,14 +3711,17 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
return llvm::GlobalValue::InternalLinkage;
switch (Ty->getLinkage()) {
- case NoLinkage:
- case InternalLinkage:
- case UniqueExternalLinkage:
+ case Linkage::Invalid:
+ llvm_unreachable("Linkage hasn't been computed!");
+
+ case Linkage::None:
+ case Linkage::Internal:
+ case Linkage::UniqueExternal:
return llvm::GlobalValue::InternalLinkage;
- case VisibleNoLinkage:
- case ModuleLinkage:
- case ExternalLinkage:
+ case Linkage::VisibleNone:
+ case Linkage::Module:
+ case Linkage::External:
// RTTI is not enabled, which means that this type info struct is going
// to be used for exception handling. Give it linkonce_odr linkage.
if (!CGM.getLangOpts().RTTI)
@@ -3950,9 +3946,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
// If there's already an old global variable, replace it with the new one.
if (OldGV) {
GV->takeName(OldGV);
- llvm::Constant *NewPtr =
- llvm::ConstantExpr::getBitCast(GV, OldGV->getType());
- OldGV->replaceAllUsesWith(NewPtr);
+ OldGV->replaceAllUsesWith(GV);
OldGV->eraseFromParent();
}
@@ -4519,7 +4513,9 @@ namespace {
}
/// Emits a call to __cxa_begin_catch and enters a cleanup to call
-/// __cxa_end_catch.
+/// __cxa_end_catch. If -fassume-nothrow-exception-dtor is specified, we assume
+/// that the exception object's dtor is nothrow, therefore the __cxa_end_catch
+/// call can be marked as nounwind even if EndMightThrow is true.
///
/// \param EndMightThrow - true if __cxa_end_catch might throw
static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
@@ -4528,7 +4524,9 @@ static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
llvm::CallInst *call =
CGF.EmitNounwindRuntimeCall(getBeginCatchFn(CGF.CGM), Exn);
- CGF.EHStack.pushCleanup<CallEndCatch>(NormalAndEHCleanup, EndMightThrow);
+ CGF.EHStack.pushCleanup<CallEndCatch>(
+ NormalAndEHCleanup,
+ EndMightThrow && !CGF.CGM.getLangOpts().AssumeNothrowExceptionDtor);
return call;
}
@@ -4660,8 +4658,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
auto catchRD = CatchType->getAsCXXRecordDecl();
CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
- llvm::Type *PtrTy =
- llvm::PointerType::getUnqual(CGF.getLLVMContext()); // addrspace 0 ok
+ llvm::Type *PtrTy = CGF.UnqualPtrTy; // addrspace 0 ok
// Check for a copy expression. If we don't have a copy expression,
// that means a trivial copy is okay.
@@ -4849,8 +4846,7 @@ void XLCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
llvm::FunctionCallee Dtor,
llvm::Constant *Addr) {
if (D.getTLSKind() != VarDecl::TLS_None) {
- llvm::PointerType *PtrTy =
- llvm::PointerType::getUnqual(CGF.getLLVMContext());
+ llvm::PointerType *PtrTy = CGF.UnqualPtrTy;
// extern "C" int __pt_atexit_np(int flags, int(*)(int,...), ...);
llvm::FunctionType *AtExitTy =
diff --git a/clang/lib/CodeGen/LinkInModulesPass.cpp b/clang/lib/CodeGen/LinkInModulesPass.cpp
new file mode 100644
index 000000000000..6ce2b94c1db8
--- /dev/null
+++ b/clang/lib/CodeGen/LinkInModulesPass.cpp
@@ -0,0 +1,29 @@
+//===-- LinkInModulesPass.cpp - Module Linking pass --------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// LinkInModulesPass implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "LinkInModulesPass.h"
+#include "BackendConsumer.h"
+
+using namespace llvm;
+
+LinkInModulesPass::LinkInModulesPass(clang::BackendConsumer *BC,
+ bool ShouldLinkFiles)
+ : BC(BC), ShouldLinkFiles(ShouldLinkFiles) {}
+
+PreservedAnalyses LinkInModulesPass::run(Module &M, ModuleAnalysisManager &AM) {
+
+ if (BC && BC->LinkInModules(&M, ShouldLinkFiles))
+ report_fatal_error("Bitcode module linking failed, compilation aborted!");
+
+ return PreservedAnalyses::all();
+}
diff --git a/clang/lib/CodeGen/LinkInModulesPass.h b/clang/lib/CodeGen/LinkInModulesPass.h
new file mode 100644
index 000000000000..7fe94d625058
--- /dev/null
+++ b/clang/lib/CodeGen/LinkInModulesPass.h
@@ -0,0 +1,42 @@
+//===-- LinkInModulesPass.h - Module Linking pass ----------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides a pass to link in Modules from a provided
+/// BackendConsumer.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_LINKINMODULESPASS_H
+#define LLVM_BITCODE_LINKINMODULESPASS_H
+
+#include "BackendConsumer.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Module;
+class ModulePass;
+class Pass;
+
+/// Create and return a pass that links in Moduels from a provided
+/// BackendConsumer to a given primary Module. Note that this pass is designed
+/// for use with the legacy pass manager.
+class LinkInModulesPass : public PassInfoMixin<LinkInModulesPass> {
+ clang::BackendConsumer *BC;
+ bool ShouldLinkFiles;
+
+public:
+ LinkInModulesPass(clang::BackendConsumer *BC, bool ShouldLinkFiles = true);
+
+ PreservedAnalyses run(Module &M, AnalysisManager<Module> &);
+ static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index a692abaf3b75..172c4c937b97 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -828,9 +828,9 @@ private:
/// Info on the global variable used to guard initialization of static locals.
/// The BitIndex field is only used for externally invisible declarations.
struct GuardInfo {
- GuardInfo() : Guard(nullptr), BitIndex(0) {}
- llvm::GlobalVariable *Guard;
- unsigned BitIndex;
+ GuardInfo() = default;
+ llvm::GlobalVariable *Guard = nullptr;
+ unsigned BitIndex = 0;
};
/// Map from DeclContext to the current guard variable. We assume that the
@@ -1235,7 +1235,6 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
const VBOffsets &VBaseMap = Layout.getVBaseOffsetsMap();
CGBuilderTy &Builder = CGF.Builder;
- unsigned AS = getThisAddress(CGF).getAddressSpace();
llvm::Value *Int8This = nullptr; // Initialize lazily.
for (const CXXBaseSpecifier &S : RD->vbases()) {
@@ -1256,14 +1255,12 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
VtorDispValue = Builder.CreateTruncOrBitCast(VtorDispValue, CGF.Int32Ty);
if (!Int8This)
- Int8This = Builder.CreateBitCast(getThisValue(CGF),
- CGF.Int8Ty->getPointerTo(AS));
+ Int8This = getThisValue(CGF);
+
llvm::Value *VtorDispPtr =
Builder.CreateInBoundsGEP(CGF.Int8Ty, Int8This, VBaseOffset);
// vtorDisp is always the 32-bits before the vbase in the class layout.
VtorDispPtr = Builder.CreateConstGEP1_32(CGF.Int8Ty, VtorDispPtr, -4);
- VtorDispPtr = Builder.CreateBitCast(
- VtorDispPtr, CGF.Int32Ty->getPointerTo(AS), "vtordisp.ptr");
Builder.CreateAlignedStore(VtorDispValue, VtorDispPtr,
CharUnits::fromQuantity(4));
@@ -1379,8 +1376,7 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage(
case Dtor_Base:
// The base destructor most closely tracks the user-declared constructor, so
// we delegate back to the normal declarator case.
- return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage,
- /*IsConstantVariable=*/false);
+ return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage);
case Dtor_Complete:
// The complete destructor is like an inline function, but it may be
// imported and therefore must be exported as well. This requires changing
@@ -1528,7 +1524,7 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
auto *IsMostDerived = ImplicitParamDecl::Create(
Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
&Context.Idents.get("is_most_derived"), Context.IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
// The 'most_derived' parameter goes second if the ctor is variadic and last
// if it's not. Dtors can't be variadic.
const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
@@ -1541,7 +1537,7 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
auto *ShouldDelete = ImplicitParamDecl::Create(
Context, /*DC=*/nullptr, CGF.CurGD.getDecl()->getLocation(),
&Context.Idents.get("should_call_delete"), Context.IntTy,
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
Params.push_back(ShouldDelete);
getStructorImplicitParamDecl(CGF) = ShouldDelete;
}
@@ -1569,14 +1565,9 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) {
if (!CGF.CurFuncIsThunk && MD->isVirtual()) {
CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(CGF.CurGD);
if (!Adjustment.isZero()) {
- unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
- llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS),
- *thisTy = This->getType();
- This = CGF.Builder.CreateBitCast(This, charPtrTy);
assert(Adjustment.isPositive());
This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
-Adjustment.getQuantity());
- This = CGF.Builder.CreateBitCast(This, thisTy, "this.adjusted");
}
}
setCXXABIThisValue(CGF, This);
@@ -1682,7 +1673,11 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info,
const CXXRecordDecl *RD,
llvm::GlobalVariable *VTable) {
- if (!CGM.getCodeGenOpts().LTOUnit)
+ // Emit type metadata on vtables with LTO or IR instrumentation.
+ // In IR instrumentation, the type metadata could be used to find out vtable
+ // definitions (for type profiling) among all global variables.
+ if (!CGM.getCodeGenOpts().LTOUnit &&
+ !CGM.getCodeGenOpts().hasProfileIRInstr())
return;
// TODO: Should VirtualFunctionElimination also be supported here?
@@ -1897,9 +1892,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
llvm::Comdat *C = nullptr;
if (!VFTableComesFromAnotherTU &&
- (llvm::GlobalValue::isWeakForLinker(VFTableLinkage) ||
- (llvm::GlobalValue::isLocalLinkage(VFTableLinkage) &&
- VTableAliasIsRequred)))
+ llvm::GlobalValue::isWeakForLinker(VFTableLinkage))
C = CGM.getModule().getOrInsertComdat(VFTableName.str());
// Only insert a pointer into the VFTable for RTTI data if we are not
@@ -2273,7 +2266,6 @@ MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
if (RA.isEmpty())
return Ret.getPointer();
- auto OrigTy = Ret.getType();
Ret = Ret.withElementType(CGF.Int8Ty);
llvm::Value *V = Ret.getPointer();
@@ -2290,8 +2282,7 @@ MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
if (RA.NonVirtual)
V = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, V, RA.NonVirtual);
- // Cast back to the original type.
- return CGF.Builder.CreateBitCast(V, OrigTy);
+ return V;
}
bool MicrosoftCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
@@ -2512,9 +2503,6 @@ LValue MicrosoftCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD);
llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
- unsigned AS = cast<llvm::PointerType>(V->getType())->getAddressSpace();
- V = CGF.Builder.CreateBitCast(V, RealVarTy->getPointerTo(AS));
-
CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
Address Addr(V, RealVarTy, Alignment);
@@ -2998,7 +2986,6 @@ MicrosoftCXXABI::EmitMemberFunctionPointer(const CXXMethodDecl *MD) {
NonVirtualBaseAdjustment -= getContext().getOffsetOfBaseWithVBPtr(RD);
// The rest of the fields are common with data member pointers.
- FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy);
return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/true, RD,
NonVirtualBaseAdjustment, VBTableIndex);
}
@@ -3227,9 +3214,6 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
const MemberPointerType *MPT) {
assert(MPT->isMemberDataPointer());
- unsigned AS = Base.getAddressSpace();
- llvm::Type *PType =
- CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
CGBuilderTy &Builder = CGF.Builder;
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
@@ -3257,16 +3241,9 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
Addr = Base.getPointer();
}
- // Cast to char*.
- Addr = Builder.CreateBitCast(Addr, CGF.Int8Ty->getPointerTo(AS));
-
// Apply the offset, which we assume is non-null.
- Addr = Builder.CreateInBoundsGEP(CGF.Int8Ty, Addr, FieldOffset,
+ return Builder.CreateInBoundsGEP(CGF.Int8Ty, Addr, FieldOffset,
"memptr.offset");
-
- // Cast the address to the appropriate pointer type, adopting the address
- // space of the base pointer.
- return Builder.CreateBitCast(Addr, PType);
}
llvm::Value *
@@ -3523,8 +3500,6 @@ CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
const FunctionProtoType *FPT =
MPT->getPointeeType()->castAs<FunctionProtoType>();
const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
- CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
CGBuilderTy &Builder = CGF.Builder;
MSInheritanceModel Inheritance = RD->getMSInheritanceModel();
@@ -3554,16 +3529,10 @@ CGCallee MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
ThisPtrForCall = This.getPointer();
}
- if (NonVirtualBaseAdjustment) {
- // Apply the adjustment and cast back to the original struct type.
- llvm::Value *Ptr = Builder.CreateBitCast(ThisPtrForCall, CGF.Int8PtrTy);
- Ptr = Builder.CreateInBoundsGEP(CGF.Int8Ty, Ptr, NonVirtualBaseAdjustment);
- ThisPtrForCall = Builder.CreateBitCast(Ptr, ThisPtrForCall->getType(),
- "this.adjusted");
- }
+ if (NonVirtualBaseAdjustment)
+ ThisPtrForCall = Builder.CreateInBoundsGEP(CGF.Int8Ty, ThisPtrForCall,
+ NonVirtualBaseAdjustment);
- FunctionPointer =
- Builder.CreateBitCast(FunctionPointer, FTy->getPointerTo());
CGCallee Callee(FPT, FunctionPointer);
return Callee;
}
@@ -3674,14 +3643,17 @@ uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent,
static llvm::GlobalValue::LinkageTypes getLinkageForRTTI(QualType Ty) {
switch (Ty->getLinkage()) {
- case NoLinkage:
- case InternalLinkage:
- case UniqueExternalLinkage:
+ case Linkage::Invalid:
+ llvm_unreachable("Linkage hasn't been computed!");
+
+ case Linkage::None:
+ case Linkage::Internal:
+ case Linkage::UniqueExternal:
return llvm::GlobalValue::InternalLinkage;
- case VisibleNoLinkage:
- case ModuleLinkage:
- case ExternalLinkage:
+ case Linkage::VisibleNone:
+ case Linkage::Module:
+ case Linkage::External:
return llvm::GlobalValue::LinkOnceODRLinkage;
}
llvm_unreachable("Invalid linkage!");
@@ -4008,7 +3980,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
// Check to see if we've already declared this TypeDescriptor.
if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
- return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+ return GV;
// Note for the future: If we would ever like to do deferred emission of
// RTTI, check if emitting vtables opportunistically need any adjustment.
@@ -4034,7 +4006,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
MangledName);
if (Var->isWeakForLinker())
Var->setComdat(CGM.getModule().getOrInsertComdat(Var->getName()));
- return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy);
+ return Var;
}
/// Gets or a creates a Microsoft CompleteObjectLocator.
@@ -4118,7 +4090,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
&getContext().Idents.get("src"),
getContext().getLValueReferenceType(RecordTy,
/*SpelledAsLValue=*/true),
- ImplicitParamDecl::Other);
+ ImplicitParamKind::Other);
if (IsCopy)
FunctionArgs.push_back(&SrcParam);
@@ -4128,7 +4100,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
ImplicitParamDecl IsMostDerived(getContext(), /*DC=*/nullptr,
SourceLocation(),
&getContext().Idents.get("is_most_derived"),
- getContext().IntTy, ImplicitParamDecl::Other);
+ getContext().IntTy, ImplicitParamKind::Other);
// Only add the parameter to the list if the class has virtual bases.
if (RD->getNumVBases() > 0)
FunctionArgs.push_back(&IsMostDerived);
@@ -4227,8 +4199,6 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure);
else
CopyCtor = CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete));
-
- CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy);
} else {
CopyCtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
}
@@ -4438,14 +4408,11 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
if (CXXDestructorDecl *DtorD = RD->getDestructor())
if (!DtorD->isTrivial())
- CleanupFn = llvm::ConstantExpr::getBitCast(
- CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)),
- CGM.Int8PtrTy);
+ CleanupFn = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete));
// This is unused as far as we can tell, initialize it to null.
llvm::Constant *ForwardCompat =
getImageRelativeConstant(llvm::Constant::getNullValue(CGM.Int8PtrTy));
- llvm::Constant *PointerToCatchableTypes = getImageRelativeConstant(
- llvm::ConstantExpr::getBitCast(CTA, CGM.Int8PtrTy));
+ llvm::Constant *PointerToCatchableTypes = getImageRelativeConstant(CTA);
llvm::StructType *TIType = getThrowInfoType();
llvm::Constant *Fields[] = {
llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
@@ -4479,7 +4446,7 @@ void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
// Call into the runtime to throw the exception.
llvm::Value *Args[] = {
- CGF.Builder.CreateBitCast(AI.getPointer(), CGM.Int8PtrTy),
+ AI.getPointer(),
TI
};
CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args);
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 114a9c1e2eac..ee543e40b460 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -160,6 +160,9 @@ public:
LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule;
CodeGenOpts.setDebugInfo(llvm::codegenoptions::FullDebugInfo);
CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning());
+ CodeGenOpts.DwarfVersion = CI.getCodeGenOpts().DwarfVersion;
+ CodeGenOpts.DebugCompilationDir =
+ CI.getInvocation().getCodeGenOpts().DebugCompilationDir;
CodeGenOpts.DebugPrefixMap =
CI.getInvocation().getCodeGenOpts().DebugPrefixMap;
CodeGenOpts.DebugStrictDwarf = CI.getCodeGenOpts().DebugStrictDwarf;
diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp
index 055dd3704386..16fbf52a517d 100644
--- a/clang/lib/CodeGen/SwiftCallingConv.cpp
+++ b/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -409,9 +409,10 @@ void SwiftAggLowering::splitVectorEntry(unsigned index) {
CharUnits begin = Entries[index].Begin;
for (unsigned i = 0; i != numElts; ++i) {
- Entries[index].Type = eltTy;
- Entries[index].Begin = begin;
- Entries[index].End = begin + eltSize;
+ unsigned idx = index + i;
+ Entries[idx].Type = eltTy;
+ Entries[idx].Begin = begin;
+ Entries[idx].End = begin + eltSize;
begin += eltSize;
}
}
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 3d79f92137ab..60224d458f6a 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -137,7 +137,7 @@ llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
if (auto *C = dyn_cast<llvm::Constant>(Src))
return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy);
// Try to preserve the source's name to make IR more readable.
- return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ return CGF.Builder.CreateAddrSpaceCast(
Src, DestTy, Src->hasName() ? Src->getName() + ".ascast" : "");
}
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 14ed5e5d2d2c..0c0781a2d5ab 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -81,6 +81,9 @@ public:
CodeGen::CodeGenModule &CGM,
const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {}
+ /// Provides a convenient hook to handle extra target-specific globals.
+ virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {}
+
/// Any further codegen related checks that need to be done on a function call
/// in a target specific manner.
virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 561110ff8c0d..be5145daa00b 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -185,7 +185,7 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
assert(Ty->isVectorType() && "expected vector type!");
const auto *VT = Ty->castAs<VectorType>();
- if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
+ if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
BuiltinType::UChar &&
@@ -194,7 +194,7 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
llvm::Type::getInt1Ty(getVMContext()), 16));
}
- if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) {
+ if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
const auto *BT = VT->getElementType()->castAs<BuiltinType>();
@@ -323,12 +323,11 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
return ABIArgInfo::getDirect(
llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
- // For alignment adjusted HFAs, cap the argument alignment to 16, leave it
- // default otherwise.
+ // For HFAs/HVAs, cap the argument alignment to 16, otherwise
+ // set it to 8 according to the AAPCS64 document.
unsigned Align =
getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
- unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
- Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
+ Align = (Align >= 16) ? 16 : 8;
return ABIArgInfo::getDirect(
llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
nullptr, true, Align);
@@ -369,8 +368,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
return ABIArgInfo::getIgnore();
if (const auto *VT = RetTy->getAs<VectorType>()) {
- if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
- VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+ if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
+ VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
return coerceIllegalVector(RetTy);
}
@@ -444,8 +443,8 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
// Check whether VT is a fixed-length SVE vector. These types are
// represented as scalable vectors in function args/return and must be
// coerced from fixed vectors.
- if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
- VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+ if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
+ VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
return true;
// Check whether VT is legal.
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 796a2be81a09..03ac6b78598f 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -8,6 +8,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
using namespace clang;
using namespace clang::CodeGen;
@@ -248,6 +249,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
return ABIArgInfo::getDirect();
}
}
+
+ // Use pass-by-reference in stead of pass-by-value for struct arguments in
+ // function ABI.
+ return ABIArgInfo::getIndirectAliased(
+ getContext().getTypeAlignInChars(Ty),
+ getContext().getTargetAddressSpace(LangAS::opencl_private));
}
// Otherwise just do the default thing.
@@ -268,6 +275,8 @@ public:
void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
CodeGenModule &CGM) const;
+ void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
+
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
@@ -299,12 +308,13 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
return false;
- return D->hasAttr<OpenCLKernelAttr>() ||
- (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
- (isa<VarDecl>(D) &&
- (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
- cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
- cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
+ return !D->hasAttr<OMPDeclareTargetDeclAttr>() &&
+ (D->hasAttr<OpenCLKernelAttr>() ||
+ (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) ||
+ (isa<VarDecl>(D) &&
+ (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+ cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())));
}
void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
@@ -317,26 +327,7 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
if (ReqdWGS || FlatWGS) {
- unsigned Min = 0;
- unsigned Max = 0;
- if (FlatWGS) {
- Min = FlatWGS->getMin()
- ->EvaluateKnownConstInt(M.getContext())
- .getExtValue();
- Max = FlatWGS->getMax()
- ->EvaluateKnownConstInt(M.getContext())
- .getExtValue();
- }
- if (ReqdWGS && Min == 0 && Max == 0)
- Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
-
- if (Min != 0) {
- assert(Min <= Max && "Min must be less than or equal Max");
-
- std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
- F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
- } else
- assert(Max == 0 && "Max must be zero");
+ M.handleAMDGPUFlatWorkGroupSizeAttr(F, FlatWGS, ReqdWGS);
} else if (IsOpenCLKernel || IsHIPKernel) {
// By default, restrict the maximum size to a value specified by
// --gpu-max-threads-per-block=n or its default value for HIP.
@@ -349,24 +340,8 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
}
- if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
- unsigned Min =
- Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue();
- unsigned Max = Attr->getMax() ? Attr->getMax()
- ->EvaluateKnownConstInt(M.getContext())
- .getExtValue()
- : 0;
-
- if (Min != 0) {
- assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
-
- std::string AttrVal = llvm::utostr(Min);
- if (Max != 0)
- AttrVal = AttrVal + "," + llvm::utostr(Max);
- F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
- } else
- assert(Max == 0 && "Max must be zero");
- }
+ if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>())
+ M.handleAMDGPUWavesPerEUAttr(F, Attr);
if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
unsigned NumSGPR = Attr->getNumSGPR();
@@ -383,6 +358,40 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
}
}
+/// Emits control constants used to change per-architecture behaviour in the
+/// AMDGPU ROCm device libraries.
+void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
+ CodeGen::CodeGenModule &CGM) const {
+ StringRef Name = "__oclc_ABI_version";
+ llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
+ if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
+ return;
+
+ if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
+ llvm::CodeObjectVersionKind::COV_None)
+ return;
+
+ auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
+ llvm::Constant *COV = llvm::ConstantInt::get(
+ Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
+
+ // It needs to be constant weak_odr without externally_initialized so that
+ // the load instuction can be eliminated by the IPSCCP.
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
+ nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
+ GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
+
+ // Replace any external references to this variable with the new global.
+ if (OriginalGV) {
+ OriginalGV->replaceAllUsesWith(GV);
+ GV->takeName(OriginalGV);
+ OriginalGV->eraseFromParent();
+ }
+}
+
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
if (requiresAMDGPUProtectedVisibility(D, GV)) {
@@ -401,13 +410,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
if (FD)
setFunctionDeclAttributes(FD, F, M);
- const bool IsHIPKernel =
- M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
-
- // TODO: This should be moved to language specific attributes instead.
- if (IsHIPKernel)
- F->addFnAttr("uniform-work-group-size", "true");
-
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
@@ -449,12 +451,11 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
return DefaultGlobalAS;
LangAS AddrSpace = D->getType().getAddressSpace();
- assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace));
if (AddrSpace != LangAS::Default)
return AddrSpace;
// Only promote to address space 4 if VarDecl has constant initialization.
- if (CGM.isTypeConstant(D->getType(), false, false) &&
+ if (D->getType().isConstantStorage(CGM.getContext(), false, false) &&
D->hasConstantInitialization()) {
if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
return *ConstAS;
@@ -470,20 +471,25 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
std::string Name;
switch (Scope) {
case SyncScope::HIPSingleThread:
+ case SyncScope::SingleScope:
Name = "singlethread";
break;
case SyncScope::HIPWavefront:
case SyncScope::OpenCLSubGroup:
+ case SyncScope::WavefrontScope:
Name = "wavefront";
break;
case SyncScope::HIPWorkgroup:
case SyncScope::OpenCLWorkGroup:
+ case SyncScope::WorkgroupScope:
Name = "workgroup";
break;
case SyncScope::HIPAgent:
case SyncScope::OpenCLDevice:
+ case SyncScope::DeviceScope:
Name = "agent";
break;
+ case SyncScope::SystemScope:
case SyncScope::HIPSystem:
case SyncScope::OpenCLAllSVMDevices:
Name = "";
@@ -595,6 +601,53 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
return F;
}
+void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr(
+ llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
+ const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal,
+ int32_t *MaxThreadsVal) {
+ unsigned Min = 0;
+ unsigned Max = 0;
+ if (FlatWGS) {
+ Min = FlatWGS->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
+ Max = FlatWGS->getMax()->EvaluateKnownConstInt(getContext()).getExtValue();
+ }
+ if (ReqdWGS && Min == 0 && Max == 0)
+ Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();
+
+ if (Min != 0) {
+ assert(Min <= Max && "Min must be less than or equal Max");
+
+ if (MinThreadsVal)
+ *MinThreadsVal = Min;
+ if (MaxThreadsVal)
+ *MaxThreadsVal = Max;
+ std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
+ if (F)
+ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
+ } else
+ assert(Max == 0 && "Max must be zero");
+}
+
+void CodeGenModule::handleAMDGPUWavesPerEUAttr(
+ llvm::Function *F, const AMDGPUWavesPerEUAttr *Attr) {
+ unsigned Min =
+ Attr->getMin()->EvaluateKnownConstInt(getContext()).getExtValue();
+ unsigned Max =
+ Attr->getMax()
+ ? Attr->getMax()->EvaluateKnownConstInt(getContext()).getExtValue()
+ : 0;
+
+ if (Min != 0) {
+ assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
+
+ std::string AttrVal = llvm::utostr(Min);
+ if (Max != 0)
+ AttrVal = AttrVal + "," + llvm::utostr(Max);
+ F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
+ } else
+ assert(Max == 0 && "Max must be zero");
+}
+
std::unique_ptr<TargetCodeGenInfo>
CodeGen::createAMDGPUTargetCodeGenInfo(CodeGenModule &CGM) {
return std::make_unique<AMDGPUTargetCodeGenInfo>(CGM.getTypes());
diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp
index 6391a8aeaa67..7b2c31139b0b 100644
--- a/clang/lib/CodeGen/Targets/LoongArch.cpp
+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp
@@ -148,6 +148,13 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
uint64_t ArraySize = ATy->getSize().getZExtValue();
QualType EltTy = ATy->getElementType();
+ // Non-zero-length arrays of empty records make the struct ineligible to be
+ // passed via FARs in C++.
+ if (const auto *RTy = EltTy->getAs<RecordType>()) {
+ if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getDecl()) &&
+ isEmptyRecord(getContext(), EltTy, true, true))
+ return false;
+ }
CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
for (uint64_t i = 0; i < ArraySize; ++i) {
if (!detectFARsEligibleStructHelper(EltTy, CurOff, Field1Ty, Field1Off,
@@ -163,10 +170,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper(
// copy constructor are not eligible for the FP calling convention.
if (getRecordArgABI(Ty, CGT.getCXXABI()))
return false;
- if (isEmptyRecord(getContext(), Ty, true))
- return true;
const RecordDecl *RD = RTy->getDecl();
- // Unions aren't eligible unless they're empty (which is caught above).
+ if (isEmptyRecord(getContext(), Ty, true, true) &&
+ (!RD->isUnion() || !isa<CXXRecordDecl>(RD)))
+ return true;
+ // Unions aren't eligible unless they're empty in C (which is caught above).
if (RD->isUnion())
return false;
const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
@@ -222,6 +230,8 @@ bool LoongArchABIInfo::detectFARsEligibleStruct(
if (!detectFARsEligibleStructHelper(Ty, CharUnits::Zero(), Field1Ty,
Field1Off, Field2Ty, Field2Off))
return false;
+ if (!Field1Ty)
+ return false;
// Not really a candidate if we have a single int but no float.
if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
return false;
@@ -299,12 +309,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
CGCXXABI::RAA_DirectInMemory);
}
- // Ignore empty structs/unions.
- if (isEmptyRecord(getContext(), Ty, true))
- return ABIArgInfo::getIgnore();
-
uint64_t Size = getContext().getTypeSize(Ty);
+ // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or
+ // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size
+ // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour.
+ if (isEmptyRecord(getContext(), Ty, true) && Size == 0)
+ return ABIArgInfo::getIgnore();
+
// Pass floating point values via FARs if possible.
if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() &&
FRLen >= Size && FARsLeft) {
@@ -312,6 +324,13 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
return ABIArgInfo::getDirect();
}
+ // Pass 128-bit/256-bit vector values via vector registers directly.
+ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) &&
+ (getTarget().hasFeature("lsx"))) ||
+ ((getContext().getTypeSize(Ty) == 256) &&
+ getTarget().hasFeature("lasx"))))
+ return ABIArgInfo::getDirect();
+
// Complex types for the *f or *d ABI must be passed directly rather than
// using CoerceAndExpand.
if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) {
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 1ca0192333a0..d0dc7c258a03 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -71,12 +71,12 @@ public:
return true;
}
-private:
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
int Operand);
+private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
LValue Src) {
llvm::Value *Handle = nullptr;
@@ -256,24 +256,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// Create !{<func-ref>, metadata !"kernel", i32 1} node
addNVVMMetadata(F, "kernel", 1);
}
- if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
- // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
- llvm::APSInt MaxThreads(32);
- MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
- if (MaxThreads > 0)
- addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
-
- // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
- // not specified in __launch_bounds__ or if the user specified a 0 value,
- // we don't have to add a PTX directive.
- if (Attr->getMinBlocks()) {
- llvm::APSInt MinBlocks(32);
- MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
- if (MinBlocks > 0)
- // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
- addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
- }
- }
+ if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
+ M.handleCUDALaunchBoundsAttr(F, Attr);
}
// Attach kernel metadata directly if compiling for NVPTX.
@@ -303,6 +287,55 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
}
}
+void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
+ const CUDALaunchBoundsAttr *Attr,
+ int32_t *MaxThreadsVal,
+ int32_t *MinBlocksVal,
+ int32_t *MaxClusterRankVal) {
+ // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+ llvm::APSInt MaxThreads(32);
+ MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
+ if (MaxThreads > 0) {
+ if (MaxThreadsVal)
+ *MaxThreadsVal = MaxThreads.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
+ MaxThreads.getExtValue());
+ }
+ }
+
+ // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
+ // was not specified in __launch_bounds__ or if the user specified a 0 value,
+ // we don't have to add a PTX directive.
+ if (Attr->getMinBlocks()) {
+ llvm::APSInt MinBlocks(32);
+ MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
+ if (MinBlocks > 0) {
+ if (MinBlocksVal)
+ *MinBlocksVal = MinBlocks.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
+ MinBlocks.getExtValue());
+ }
+ }
+ }
+ if (Attr->getMaxBlocks()) {
+ llvm::APSInt MaxBlocks(32);
+ MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
+ if (MaxBlocks > 0) {
+ if (MaxClusterRankVal)
+ *MaxClusterRankVal = MaxBlocks.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
+ MaxBlocks.getExtValue());
+ }
+ }
+ }
+}
+
std::unique_ptr<TargetCodeGenInfo>
CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp
index 9cdd2aa07791..40dddde508c1 100644
--- a/clang/lib/CodeGen/Targets/PPC.cpp
+++ b/clang/lib/CodeGen/Targets/PPC.cpp
@@ -431,7 +431,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
llvm::Type *DirectTy = CGF.ConvertType(Ty), *ElementTy = DirectTy;
if (isIndirect)
- DirectTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
+ DirectTy = CGF.UnqualPtrTy;
// Case 1: consume registers.
Address RegAddr = Address::invalid();
@@ -620,6 +620,9 @@ public:
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
+ void emitTargetMetadata(CodeGen::CodeGenModule &CGM,
+ const llvm::MapVector<GlobalDecl, StringRef>
+ &MangledDeclNames) const override;
};
class PPC64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -940,6 +943,24 @@ PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable(
/*IsAIX*/ false);
}
+void PPC64_SVR4_TargetCodeGenInfo::emitTargetMetadata(
+ CodeGen::CodeGenModule &CGM,
+ const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {
+ if (CGM.getTypes().isLongDoubleReferenced()) {
+ llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+ const auto *flt = &CGM.getTarget().getLongDoubleFormat();
+ if (flt == &llvm::APFloat::PPCDoubleDouble())
+ CGM.getModule().addModuleFlag(llvm::Module::Error, "float-abi",
+ llvm::MDString::get(Ctx, "doubledouble"));
+ else if (flt == &llvm::APFloat::IEEEquad())
+ CGM.getModule().addModuleFlag(llvm::Module::Error, "float-abi",
+ llvm::MDString::get(Ctx, "ieeequad"));
+ else if (flt == &llvm::APFloat::IEEEdouble())
+ CGM.getModule().addModuleFlag(llvm::Module::Error, "float-abi",
+ llvm::MDString::get(Ctx, "ieeedouble"));
+ }
+}
+
bool
PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const {
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index b6d8ae462675..1e1d249b37ac 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -8,7 +8,6 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
-#include "llvm/TargetParser/RISCVTargetParser.h"
using namespace clang;
using namespace clang::CodeGen;
@@ -152,6 +151,13 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
uint64_t ArraySize = ATy->getSize().getZExtValue();
QualType EltTy = ATy->getElementType();
+ // Non-zero-length arrays of empty records make the struct ineligible for
+ // the FP calling convention in C++.
+ if (const auto *RTy = EltTy->getAs<RecordType>()) {
+ if (ArraySize != 0 && isa<CXXRecordDecl>(RTy->getDecl()) &&
+ isEmptyRecord(getContext(), EltTy, true, true))
+ return false;
+ }
CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
for (uint64_t i = 0; i < ArraySize; ++i) {
bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
@@ -168,7 +174,7 @@ bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
// copy constructor are not eligible for the FP calling convention.
if (getRecordArgABI(Ty, CGT.getCXXABI()))
return false;
- if (isEmptyRecord(getContext(), Ty, true))
+ if (isEmptyRecord(getContext(), Ty, true, true))
return true;
const RecordDecl *RD = RTy->getDecl();
// Unions aren't eligible unless they're empty (which is caught above).
@@ -238,6 +244,8 @@ bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
NeededArgFPRs = 0;
bool IsCandidate = detectFPCCEligibleStructHelper(
Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
+ if (!Field1Ty)
+ return false;
// Not really a candidate if we have a single int but no float.
if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
return false;
@@ -310,16 +318,20 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const {
assert(Ty->isVectorType() && "expected vector type!");
const auto *VT = Ty->castAs<VectorType>();
- assert(VT->getVectorKind() == VectorType::RVVFixedLengthDataVector &&
+ assert(VT->getVectorKind() == VectorKind::RVVFixedLengthData &&
"Unexpected vector kind");
assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
- const auto *BT = VT->getElementType()->castAs<BuiltinType>();
- unsigned EltSize = getContext().getTypeSize(BT);
+ auto VScale =
+ getContext().getTargetInfo().getVScaleRange(getContext().getLangOpts());
+ // The MinNumElts is simplified from equation:
+ // NumElts / VScale =
+ // (EltSize * NumElts / (VScale * RVVBitsPerBlock))
+ // * (RVVBitsPerBlock / EltSize)
llvm::ScalableVectorType *ResType =
- llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()),
- llvm::RISCV::RVVBitsPerBlock / EltSize);
+ llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()),
+ VT->getNumElements() / VScale->first);
return ABIArgInfo::getDirect(ResType);
}
@@ -419,7 +431,7 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
}
if (const VectorType *VT = Ty->getAs<VectorType>())
- if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector)
+ if (VT->getVectorKind() == VectorKind::RVVFixedLengthData)
return coerceVLSVector(Ty);
// Aggregates which are <= 2*XLen will be passed in registers if possible,
diff --git a/clang/lib/CodeGen/Targets/Sparc.cpp b/clang/lib/CodeGen/Targets/Sparc.cpp
index f5cafaa97315..a337a52a94ec 100644
--- a/clang/lib/CodeGen/Targets/Sparc.cpp
+++ b/clang/lib/CodeGen/Targets/Sparc.cpp
@@ -286,7 +286,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
CGBuilderTy &Builder = CGF.Builder;
Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"),
getVAListElementType(CGF), SlotSize);
- llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
+ llvm::Type *ArgPtrTy = CGF.UnqualPtrTy;
auto TypeInfo = getContext().getTypeInfoInChars(Ty);
diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp
index 31679d899a44..2af240350438 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -87,12 +87,15 @@ static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
/// Similar to llvm::CCState, but for Clang.
struct CCState {
CCState(CGFunctionInfo &FI)
- : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}
+ : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()),
+ Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {}
llvm::SmallBitVector IsPreassigned;
unsigned CC = CallingConv::CC_C;
unsigned FreeRegs = 0;
unsigned FreeSSERegs = 0;
+ RequiredArgs Required;
+ bool IsDelegateCall = false;
};
/// X86_32ABIInfo - The X86-32 ABI information.
@@ -140,7 +143,8 @@ class X86_32ABIInfo : public ABIInfo {
Class classify(QualType Ty) const;
ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
- ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
+ ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State,
+ unsigned ArgIndex) const;
/// Updates the number of available free registers, returns
/// true if any registers were allocated.
@@ -737,8 +741,8 @@ void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) c
}
}
-ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
- CCState &State) const {
+ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State,
+ unsigned ArgIndex) const {
// FIXME: Set alignment on indirect arguments.
bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
@@ -753,6 +757,12 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
if (RAA == CGCXXABI::RAA_Indirect) {
return getIndirectResult(Ty, false, State);
+ } else if (State.IsDelegateCall) {
+ // Avoid having different alignments on delegate call args by always
+ // setting the alignment to 4, which is what we do for inallocas.
+ ABIArgInfo Res = getIndirectResult(Ty, false, State);
+ Res.setIndirectAlign(CharUnits::fromQuantity(4));
+ return Res;
} else if (RAA == CGCXXABI::RAA_DirectInMemory) {
// The field index doesn't matter, we'll fix it up later.
return ABIArgInfo::getInAlloca(/*FieldIndex=*/0);
@@ -805,11 +815,12 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
- // Pass over-aligned aggregates on Windows indirectly. This behavior was
- // added in MSVC 2015. Use the required alignment from the record layout,
- // since that may be less than the regular type alignment, and types with
- // required alignment of less than 4 bytes are not passed indirectly.
- if (IsWin32StructABI) {
+ // Pass over-aligned aggregates to non-variadic functions on Windows
+ // indirectly. This behavior was added in MSVC 2015. Use the required
+ // alignment from the record layout, since that may be less than the
+ // regular type alignment, and types with required alignment of less than 4
+ // bytes are not passed indirectly.
+ if (IsWin32StructABI && State.Required.isRequiredArg(ArgIndex)) {
unsigned AlignInBits = 0;
if (RT) {
const ASTRecordLayout &Layout =
@@ -935,12 +946,13 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
bool UsedInAlloca = false;
MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
- for (int I = 0, E = Args.size(); I < E; ++I) {
+ for (unsigned I = 0, E = Args.size(); I < E; ++I) {
// Skip arguments that have already been assigned.
if (State.IsPreassigned.test(I))
continue;
- Args[I].info = classifyArgumentType(Args[I].type, State);
+ Args[I].info =
+ classifyArgumentType(Args[I].type, State, I);
UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca);
}
@@ -1500,6 +1512,24 @@ static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
return false;
}
+static bool checkAVX512ParamFeature(DiagnosticsEngine &Diag,
+ SourceLocation CallLoc,
+ const llvm::StringMap<bool> &CallerMap,
+ const llvm::StringMap<bool> &CalleeMap,
+ QualType Ty, bool IsArgument) {
+ bool Caller256 = CallerMap.lookup("avx512f") && !CallerMap.lookup("evex512");
+ bool Callee256 = CalleeMap.lookup("avx512f") && !CalleeMap.lookup("evex512");
+
+ // Forbid 512-bit or larger vector pass or return when we disabled ZMM
+ // instructions.
+ if (Caller256 || Callee256)
+ return Diag.Report(CallLoc, diag::err_avx_calling_convention)
+ << IsArgument << Ty << "evex512";
+
+ return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
+ "avx512f", IsArgument);
+}
+
static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
SourceLocation CallLoc,
const llvm::StringMap<bool> &CallerMap,
@@ -1507,8 +1537,8 @@ static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
bool IsArgument) {
uint64_t Size = Ctx.getTypeSize(Ty);
if (Size > 256)
- return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
- "avx512f", IsArgument);
+ return checkAVX512ParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
+ IsArgument);
if (Size > 128)
return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx",
@@ -2948,9 +2978,7 @@ static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
// AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
- llvm::Value *Res =
- CGF.Builder.CreateBitCast(overflow_arg_area,
- llvm::PointerType::getUnqual(LTy));
+ llvm::Value *Res = overflow_arg_area;
// AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
// l->overflow_arg_area + sizeof(type).
@@ -3053,8 +3081,6 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
llvm::Type *TyHi = ST->getElementType(1);
assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
"Unexpected ABI info for mixed regs");
- llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
- llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
llvm::Value *GPAddr =
CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset);
llvm::Value *FPAddr =
@@ -3065,13 +3091,13 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
// Copy the first element.
// FIXME: Our choice of alignment here and below is probably pessimistic.
llvm::Value *V = CGF.Builder.CreateAlignedLoad(
- TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo),
+ TyLo, RegLoAddr,
CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyLo)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
// Copy the second element.
V = CGF.Builder.CreateAlignedLoad(
- TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi),
+ TyHi, RegHiAddr,
CharUnits::fromQuantity(getDataLayout().getABITypeAlign(TyHi)));
CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
diff --git a/clang/lib/CodeGen/Targets/XCore.cpp b/clang/lib/CodeGen/Targets/XCore.cpp
index 8be240c018d0..aeb48f851e16 100644
--- a/clang/lib/CodeGen/Targets/XCore.cpp
+++ b/clang/lib/CodeGen/Targets/XCore.cpp
@@ -543,7 +543,7 @@ static bool appendArrayType(SmallStringEnc &Enc, QualType QT,
const ArrayType *AT,
const CodeGen::CodeGenModule &CGM,
TypeStringCache &TSC, StringRef NoSizeEnc) {
- if (AT->getSizeModifier() != ArrayType::Normal)
+ if (AT->getSizeModifier() != ArraySizeModifier::Normal)
return false;
Enc += "a(";
if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))