summaryrefslogtreecommitdiff
path: root/lib/CodeGen/TargetInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/TargetInfo.cpp')
-rw-r--r--lib/CodeGen/TargetInfo.cpp641
1 files changed, 533 insertions, 108 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index ece3a407eae3..4b8006428f8f 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -14,6 +14,7 @@
#include "TargetInfo.h"
#include "ABIInfo.h"
+#include "CGBlocks.h"
#include "CGCXXABI.h"
#include "CGValue.h"
#include "CodeGenFunction.h"
@@ -22,7 +23,9 @@
#include "clang/CodeGen/SwiftCallingConv.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"
@@ -420,18 +423,17 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &
return llvm::ConstantPointerNull::get(T);
}
-unsigned TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
- const VarDecl *D) const {
+LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const {
assert(!CGM.getLangOpts().OpenCL &&
!(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
"Address space agnostic languages only");
- return D ? D->getType().getAddressSpace()
- : static_cast<unsigned>(LangAS::Default);
+ return D ? D->getType().getAddressSpace() : LangAS::Default;
}
llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
- CodeGen::CodeGenFunction &CGF, llvm::Value *Src, unsigned SrcAddr,
- unsigned DestAddr, llvm::Type *DestTy, bool isNonNull) const {
+ CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr,
+ LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const {
// Since target may map different address spaces in AST to the same address
// space, an address space conversion may end up as a bitcast.
if (auto *C = dyn_cast<llvm::Constant>(Src))
@@ -441,13 +443,18 @@ llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
llvm::Constant *
TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
- unsigned SrcAddr, unsigned DestAddr,
+ LangAS SrcAddr, LangAS DestAddr,
llvm::Type *DestTy) const {
// Since target may map different address spaces in AST to the same address
// space, an address space conversion may end up as a bitcast.
return llvm::ConstantExpr::getPointerCast(Src, DestTy);
}
+llvm::SyncScope::ID
+TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const {
+ return C.getOrInsertSyncScopeID(""); /* default sync scope */
+}
+
static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
/// isEmptyField - Return true iff a the field is "empty", that is it
@@ -869,7 +876,10 @@ bool IsX86_MMXType(llvm::Type *IRType) {
static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
StringRef Constraint,
llvm::Type* Ty) {
- if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) {
+ bool IsMMXCons = llvm::StringSwitch<bool>(Constraint)
+ .Cases("y", "&y", "^Ym", true)
+ .Default(false);
+ if (IsMMXCons && Ty->isVectorTy()) {
if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) {
// Invalid MMX constraint
return nullptr;
@@ -886,8 +896,14 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
- if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half)
+ if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
+ if (BT->getKind() == BuiltinType::LongDouble) {
+ if (&Context.getTargetInfo().getLongDoubleFormat() ==
+ &llvm::APFloat::x87DoubleExtended())
+ return false;
+ }
return true;
+ }
} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
// vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
// registers specially.
@@ -1041,7 +1057,8 @@ public:
const llvm::Triple &Triple, const CodeGenOptions &Opts);
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
// Darwin uses different dwarf register numbers for EH.
@@ -1070,14 +1087,14 @@ public:
getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
unsigned Sig = (0xeb << 0) | // jmp rel8
(0x06 << 8) | // .+0x08
- ('F' << 16) |
- ('T' << 24);
+ ('v' << 16) |
+ ('2' << 24);
return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
return "movl\t%ebp, %ebp"
- "\t\t## marker for objc_retainAutoreleaseReturnValue";
+ "\t\t// marker for objc_retainAutoreleaseReturnValue";
}
};
@@ -1900,7 +1917,6 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
case llvm::Triple::DragonFly:
case llvm::Triple::FreeBSD:
case llvm::Triple::OpenBSD:
- case llvm::Triple::Bitrig:
case llvm::Triple::Win32:
return true;
default:
@@ -1908,9 +1924,11 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
}
}
-void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
+void X86_32TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
// Get the LLVM function.
@@ -2260,23 +2278,28 @@ public:
llvm::Constant *
getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
- unsigned Sig;
- if (getABIInfo().has64BitPointers())
- Sig = (0xeb << 0) | // jmp rel8
- (0x0a << 8) | // .+0x0c
- ('F' << 16) |
- ('T' << 24);
- else
- Sig = (0xeb << 0) | // jmp rel8
- (0x06 << 8) | // .+0x08
- ('F' << 16) |
- ('T' << 24);
+ unsigned Sig = (0xeb << 0) | // jmp rel8
+ (0x06 << 8) | // .+0x08
+ ('v' << 16) |
+ ('2' << 24);
return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
+ // Get the LLVM function.
+ auto *Fn = cast<llvm::Function>(GV);
+
+ // Now add the 'alignstack' attribute with a value of 16.
+ llvm::AttrBuilder B;
+ B.addStackAlignmentAttr(16);
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ }
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->setCallingConv(llvm::CallingConv::X86_INTR);
@@ -2323,7 +2346,8 @@ public:
Win32StructABI, NumRegisterParameters, false) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
@@ -2351,11 +2375,12 @@ static void addStackProbeSizeTargetAttribute(const Decl *D,
}
}
-void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
- X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
-
+void WinX86_32TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
+ if (!IsForDefinition)
+ return;
addStackProbeSizeTargetAttribute(D, GV, CGM);
}
@@ -2366,7 +2391,8 @@ public:
: TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
return 7;
@@ -2395,12 +2421,22 @@ public:
}
};
-void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const {
- TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
-
+void WinX86_64TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+ if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
+ // Get the LLVM function.
+ auto *Fn = cast<llvm::Function>(GV);
+
+ // Now add the 'alignstack' attribute with a value of 16.
+ llvm::AttrBuilder B;
+ B.addStackAlignmentAttr(16);
+ Fn->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ }
if (FD->hasAttr<AnyX86InterruptAttr>()) {
llvm::Function *Fn = cast<llvm::Function>(GV);
Fn->setCallingConv(llvm::CallingConv::X86_INTR);
@@ -3514,18 +3550,27 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
unsigned FreeSSERegs = IsRegCall ? 16 : 8;
unsigned NeededInt, NeededSSE;
- if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
- !FI.getReturnType()->getTypePtr()->isUnionType()) {
- FI.getReturnInfo() =
- classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE);
- if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
- FreeIntRegs -= NeededInt;
- FreeSSERegs -= NeededSSE;
- } else {
- FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
- }
- } else if (!getCXXABI().classifyReturnType(FI))
- FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ if (!getCXXABI().classifyReturnType(FI)) {
+ if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
+ !FI.getReturnType()->getTypePtr()->isUnionType()) {
+ FI.getReturnInfo() =
+ classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE);
+ if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
+ FreeIntRegs -= NeededInt;
+ FreeSSERegs -= NeededSSE;
+ } else {
+ FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
+ }
+ } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) {
+ // Complex Long Double Type is passed in Memory when Regcall
+ // calling convention is used.
+ const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>();
+ if (getContext().getCanonicalType(CT->getElementType()) ==
+ getContext().LongDoubleTy)
+ FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
+ } else
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ }
// If the return value is indirect, then the hidden argument is consuming one
// integer register.
@@ -3991,7 +4036,10 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
namespace {
/// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
-bool IsSoftFloatABI;
+ bool IsSoftFloatABI;
+
+ CharUnits getParamTypeAlignment(QualType Ty) const;
+
public:
PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI)
: DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {}
@@ -4013,13 +4061,46 @@ public:
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
llvm::Value *Address) const override;
};
+}
+CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
+ // Complex types are passed just like their elements
+ if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+ Ty = CTy->getElementType();
+
+ if (Ty->isVectorType())
+ return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16
+ : 4);
+
+ // For single-element float/vector structs, we consider the whole type
+ // to have the same alignment requirements as its single element.
+ const Type *AlignTy = nullptr;
+ if (const Type *EltType = isSingleElementStruct(Ty, getContext())) {
+ const BuiltinType *BT = EltType->getAs<BuiltinType>();
+ if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) ||
+ (BT && BT->isFloatingPoint()))
+ AlignTy = EltType;
+ }
+
+ if (AlignTy)
+ return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4);
+ return CharUnits::fromQuantity(4);
}
// TODO: this implementation is now likely redundant with
// DefaultABIInfo::EmitVAArg.
Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
QualType Ty) const {
+ if (getTarget().getTriple().isOSDarwin()) {
+ auto TI = getContext().getTypeInfoInChars(Ty);
+ TI.second = getParamTypeAlignment(Ty);
+
+ CharUnits SlotSize = CharUnits::fromQuantity(4);
+ return emitVoidPtrVAArg(CGF, VAList, Ty,
+ classifyArgumentType(Ty).isIndirect(), TI, SlotSize,
+ /*AllowHigherAlign=*/true);
+ }
+
const unsigned OverflowLimit = 8;
if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
// TODO: Implement this. For now ignore.
@@ -4860,7 +4941,7 @@ public:
: TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
- return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue";
+ return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
@@ -4869,6 +4950,22 @@ public:
bool doesReturnSlotInterfereWithArgs() const override { return false; }
};
+
+class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
+public:
+ WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K)
+ : AArch64TargetCodeGenInfo(CGT, K) {}
+
+ void getDependentLibraryOption(llvm::StringRef Lib,
+ llvm::SmallString<24> &Opt) const override {
+ Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
+ }
+
+ void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
+ llvm::SmallString<32> &Opt) const override {
+ Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
+ }
+};
}
ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
@@ -5450,7 +5547,7 @@ public:
}
StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
- return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue";
+ return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue";
}
bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
@@ -5468,7 +5565,10 @@ public:
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
@@ -5510,7 +5610,8 @@ public:
: ARMTargetCodeGenInfo(CGT, K) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override;
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override;
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
@@ -5524,8 +5625,11 @@ public:
};
void WindowsARMTargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
- ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const {
+ ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition);
+ if (!IsForDefinition)
+ return;
addStackProbeSizeTargetAttribute(D, GV, CGM);
}
}
@@ -6051,7 +6155,9 @@ public:
: TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
+
private:
// Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
@@ -6105,9 +6211,11 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
llvm_unreachable("NVPTX does not support varargs");
}
-void NVPTXTargetCodeGenInfo::
-setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const{
+void NVPTXTargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -6211,7 +6319,7 @@ public:
return occupiesMoreThan(CGT, scalars, /*total*/ 4);
}
bool isSwiftErrorInRegister() const override {
- return true;
+ return false;
}
};
@@ -6543,14 +6651,17 @@ public:
MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
};
}
-void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const {
+void MSP430TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) {
// Handle 'interrupt' attribute:
@@ -6609,10 +6720,21 @@ public:
}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
llvm::Function *Fn = cast<llvm::Function>(GV);
+
+ if (FD->hasAttr<MipsLongCallAttr>())
+ Fn->addFnAttr("long-call");
+ else if (FD->hasAttr<MipsShortCallAttr>())
+ Fn->addFnAttr("short-call");
+
+ // Other attributes do not have a meaning for declarations.
+ if (!IsForDefinition)
+ return;
+
if (FD->hasAttr<Mips16Attr>()) {
Fn->addFnAttr("mips16");
}
@@ -6974,7 +7096,10 @@ public:
: TargetCodeGenInfo(new DefaultABIInfo(CGT)) { }
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &CGM) const override {
+ CodeGen::CodeGenModule &CGM,
+ ForDefinition_t IsForDefinition) const override {
+ if (!IsForDefinition)
+ return;
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
auto *Fn = cast<llvm::Function>(GV);
@@ -7002,11 +7127,15 @@ public:
: DefaultTargetCodeGenInfo(CGT) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
};
void TCETargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;
@@ -7302,38 +7431,138 @@ public:
namespace {
class AMDGPUABIInfo final : public DefaultABIInfo {
+private:
+ static const unsigned MaxNumRegsForArgsRet = 16;
+
+ unsigned numRegsForType(QualType Ty) const;
+
+ bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+ bool isHomogeneousAggregateSmallEnough(const Type *Base,
+ uint64_t Members) const override;
+
public:
- explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+ explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
+ DefaultABIInfo(CGT) {}
-private:
- ABIArgInfo classifyArgumentType(QualType Ty) const;
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+ ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+ ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
};
+bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+ return true;
+}
+
+bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
+ const Type *Base, uint64_t Members) const {
+ uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;
+
+ // Homogeneous Aggregates may occupy at most 16 registers.
+ return Members * NumRegs <= MaxNumRegsForArgsRet;
+}
+
+/// Estimate number of registers the type will use when passed in registers.
+unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
+ unsigned NumRegs = 0;
+
+ if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ // Compute from the number of elements. The reported size is based on the
+ // in-memory size, which includes the padding 4th element for 3-vectors.
+ QualType EltTy = VT->getElementType();
+ unsigned EltSize = getContext().getTypeSize(EltTy);
+
+ // 16-bit element vectors should be passed as packed.
+ if (EltSize == 16)
+ return (VT->getNumElements() + 1) / 2;
+
+ unsigned EltNumRegs = (EltSize + 31) / 32;
+ return EltNumRegs * VT->getNumElements();
+ }
+
+ if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ assert(!RD->hasFlexibleArrayMember());
+
+ for (const FieldDecl *Field : RD->fields()) {
+ QualType FieldTy = Field->getType();
+ NumRegs += numRegsForType(FieldTy);
+ }
+
+ return NumRegs;
+ }
+
+ return (getContext().getTypeSize(Ty) + 31) / 32;
+}
+
void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
+ llvm::CallingConv::ID CC = FI.getCallingConvention();
+
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
- unsigned CC = FI.getCallingConvention();
- for (auto &Arg : FI.arguments())
- if (CC == llvm::CallingConv::AMDGPU_KERNEL)
- Arg.info = classifyArgumentType(Arg.type);
- else
- Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type);
+ unsigned NumRegsLeft = MaxNumRegsForArgsRet;
+ for (auto &Arg : FI.arguments()) {
+ if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
+ Arg.info = classifyKernelArgumentType(Arg.type);
+ } else {
+ Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
+ }
+ }
}
-/// \brief Classify argument of given type \p Ty.
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const {
- llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
- if (!StrTy) {
- return DefaultABIInfo::classifyArgumentType(Ty);
+ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
+ if (isAggregateTypeForABI(RetTy)) {
+ // Records with non-trivial destructors/copy-constructors should not be
+ // returned by value.
+ if (!getRecordArgABI(RetTy, getCXXABI())) {
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), RetTy, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just return a regular value.
+ if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
+ if (const RecordType *RT = RetTy->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyReturnType(RetTy);
+ }
+
+ // Pack aggregates <= 4 bytes into single VGPR or pair.
+ uint64_t Size = getContext().getTypeSize(RetTy);
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ if (Size <= 64) {
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
+ return ABIArgInfo::getDirect();
+ }
}
+ // Otherwise just do the default thing.
+ return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
+/// For kernels all parameters are really passed in a special buffer. It doesn't
+/// make sense to pass anything byval, so everything must be direct.
+ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ // TODO: Can we omit empty structs?
+
// Coerce single element structs to its element.
- if (StrTy->getNumElements() == 1) {
- return ABIArgInfo::getDirect();
- }
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
@@ -7341,30 +7570,102 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
}
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ unsigned &NumRegsLeft) const {
+ assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
+
+ Ty = useFirstFieldIfTransparentUnion(Ty);
+
+ if (isAggregateTypeForABI(Ty)) {
+ // Records with non-trivial destructors/copy-constructors should not be
+ // passed by value.
+ if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
+ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ // Lower single-element structs to just pass a regular value. TODO: We
+ // could do reasonable-size multiple-element structs too, using getExpand(),
+ // though watch out for things like bitfields.
+ if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+ return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+
+ if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyArgumentType(Ty);
+ }
+
+ // Pack aggregates <= 8 bytes into single VGPR or pair.
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 64) {
+ unsigned NumRegs = (Size + 31) / 32;
+ NumRegsLeft -= std::min(NumRegsLeft, NumRegs);
+
+ if (Size <= 16)
+ return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
+
+ if (Size <= 32)
+ return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+
+ // XXX: Should this be i64 instead, and should the limit increase?
+ llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
+ }
+
+ if (NumRegsLeft > 0) {
+ unsigned NumRegs = numRegsForType(Ty);
+ if (NumRegsLeft >= NumRegs) {
+ NumRegsLeft -= NumRegs;
+ return ABIArgInfo::getDirect();
+ }
+ }
+ }
+
+ // Otherwise just do the default thing.
+ ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
+ if (!ArgInfo.isIndirect()) {
+ unsigned NumRegs = numRegsForType(Ty);
+ NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
+ }
+
+ return ArgInfo;
+}
+
class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
public:
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {}
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
+ CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const override;
unsigned getOpenCLKernelCallingConv() const override;
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T, QualType QT) const override;
- unsigned getASTAllocaAddressSpace() const override {
- return LangAS::FirstTargetAddressSpace +
- getABIInfo().getDataLayout().getAllocaAddrSpace();
- }
- unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
- const VarDecl *D) const override;
+ LangAS getASTAllocaAddressSpace() const override {
+ return getLangASFromTargetAS(
+ getABIInfo().getDataLayout().getAllocaAddrSpace());
+ }
+ LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
+ const VarDecl *D) const override;
+ llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
+ llvm::LLVMContext &C) const override;
+ llvm::Function *
+ createEnqueuedBlockKernel(CodeGenFunction &CGF,
+ llvm::Function *BlockInvokeFunc,
+ llvm::Value *BlockLiteral) const override;
};
}
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
- const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const {
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M,
+ ForDefinition_t IsForDefinition) const {
+ if (!IsForDefinition)
+ return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
@@ -7441,21 +7742,19 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
llvm::ConstantPointerNull::get(NPT), PT);
}
-unsigned
+LangAS
AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
assert(!CGM.getLangOpts().OpenCL &&
!(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
"Address space agnostic languages only");
- unsigned DefaultGlobalAS =
- LangAS::FirstTargetAddressSpace +
- CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+ LangAS DefaultGlobalAS = getLangASFromTargetAS(
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
if (!D)
return DefaultGlobalAS;
- unsigned AddrSpace = D->getType().getAddressSpace();
- assert(AddrSpace == LangAS::Default ||
- AddrSpace >= LangAS::FirstTargetAddressSpace);
+ LangAS AddrSpace = D->getType().getAddressSpace();
+ assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace));
if (AddrSpace != LangAS::Default)
return AddrSpace;
@@ -7466,6 +7765,26 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
return DefaultGlobalAS;
}
+llvm::SyncScope::ID
+AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S,
+ llvm::LLVMContext &C) const {
+ StringRef Name;
+ switch (S) {
+ case SyncScope::OpenCLWorkGroup:
+ Name = "workgroup";
+ break;
+ case SyncScope::OpenCLDevice:
+ Name = "agent";
+ break;
+ case SyncScope::OpenCLAllSVMDevices:
+ Name = "";
+ break;
+ case SyncScope::OpenCLSubGroup:
+ Name = "subgroup";
+ }
+ return C.getOrInsertSyncScopeID(Name);
+}
+
//===----------------------------------------------------------------------===//
// SPARC v8 ABI Implementation.
// Based on the SPARC Compliance Definition version 2.4.1.
@@ -8506,7 +8825,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
if (getTarget().getABI() == "darwinpcs")
Kind = AArch64ABIInfo::DarwinPCS;
else if (Triple.isOSWindows())
- Kind = AArch64ABIInfo::Win64;
+ return SetCGInfo(
+ new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64));
return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
}
@@ -8636,3 +8956,108 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
}
}
+
+/// Create an OpenCL kernel for an enqueued block.
+///
+/// The kernel has the same function type as the block invoke function. Its
+/// name is the name of the block invoke function postfixed with "_kernel".
+/// It simply calls the block invoke function then returns.
+llvm::Function *
+TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF,
+ llvm::Function *Invoke,
+ llvm::Value *BlockLiteral) const {
+ auto *InvokeFT = Invoke->getFunctionType();
+ llvm::SmallVector<llvm::Type *, 2> ArgTys;
+ for (auto &P : InvokeFT->params())
+ ArgTys.push_back(P);
+ auto &C = CGF.getLLVMContext();
+ std::string Name = Invoke->getName().str() + "_kernel";
+ auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
+ auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
+ &CGF.CGM.getModule());
+ auto IP = CGF.Builder.saveIP();
+ auto *BB = llvm::BasicBlock::Create(C, "entry", F);
+ auto &Builder = CGF.Builder;
+ Builder.SetInsertPoint(BB);
+ llvm::SmallVector<llvm::Value *, 2> Args;
+ for (auto &A : F->args())
+ Args.push_back(&A);
+ Builder.CreateCall(Invoke, Args);
+ Builder.CreateRetVoid();
+ Builder.restoreIP(IP);
+ return F;
+}
+
+/// Create an OpenCL kernel for an enqueued block.
+///
+/// The type of the first argument (the block literal) is the struct type
+/// of the block literal instead of a pointer type. The first argument
+/// (block literal) is passed directly by value to the kernel. The kernel
+/// allocates the same type of struct on stack and stores the block literal
+/// to it and passes its pointer to the block invoke function. The kernel
+/// has "enqueued-block" function attribute and kernel argument metadata.
+llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
+ CodeGenFunction &CGF, llvm::Function *Invoke,
+ llvm::Value *BlockLiteral) const {
+ auto &Builder = CGF.Builder;
+ auto &C = CGF.getLLVMContext();
+
+ auto *BlockTy = BlockLiteral->getType()->getPointerElementType();
+ auto *InvokeFT = Invoke->getFunctionType();
+ llvm::SmallVector<llvm::Type *, 2> ArgTys;
+ llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
+
+ ArgTys.push_back(BlockTy);
+ ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
+ AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
+ ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
+ ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
+ AccessQuals.push_back(llvm::MDString::get(C, "none"));
+ ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
+ for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
+ ArgTys.push_back(InvokeFT->getParamType(I));
+ ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
+ AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
+ AccessQuals.push_back(llvm::MDString::get(C, "none"));
+ ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
+ ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
+ ArgNames.push_back(
+ llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
+ }
+ std::string Name = Invoke->getName().str() + "_kernel";
+ auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
+ auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
+ &CGF.CGM.getModule());
+ F->addFnAttr("enqueued-block");
+ auto IP = CGF.Builder.saveIP();
+ auto *BB = llvm::BasicBlock::Create(C, "entry", F);
+ Builder.SetInsertPoint(BB);
+ unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy);
+ auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
+ BlockPtr->setAlignment(BlockAlign);
+ Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
+ auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
+ llvm::SmallVector<llvm::Value *, 2> Args;
+ Args.push_back(Cast);
+ for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I)
+ Args.push_back(I);
+ Builder.CreateCall(Invoke, Args);
+ Builder.CreateRetVoid();
+ Builder.restoreIP(IP);
+
+ F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
+ F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
+ F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
+ F->setMetadata("kernel_arg_base_type",
+ llvm::MDNode::get(C, ArgBaseTypeNames));
+ F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
+ if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
+ F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
+
+ return F;
+}