diff options
Diffstat (limited to 'lib/CodeGen/TargetInfo.cpp')
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 488 |
1 files changed, 404 insertions, 84 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index aa67e71284aea..391eb53d25002 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -31,6 +31,31 @@ using namespace clang; using namespace CodeGen; +// Helper for coercing an aggregate argument or return value into an integer +// array of the same size (including padding) and alignment. This alternate +// coercion happens only for the RenderScript ABI and can be removed after +// runtimes that rely on it are no longer supported. +// +// RenderScript assumes that the size of the argument / return value in the IR +// is the same as the size of the corresponding qualified type. This helper +// coerces the aggregate type into an array of the same size (including +// padding). This coercion is used in lieu of expansion of struct members or +// other canonical coercions that return a coerced-type of larger size. +// +// Ty - The argument / return value type +// Context - The associated ASTContext +// LLVMContext - The associated LLVMContext +static ABIArgInfo coerceToIntArray(QualType Ty, + ASTContext &Context, + llvm::LLVMContext &LLVMContext) { + // Alignment and Size are measured in bits. + const uint64_t Size = Context.getTypeSize(Ty); + const uint64_t Alignment = Context.getTypeAlign(Ty); + llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment); + const uint64_t NumElements = (Size + Alignment - 1) / Alignment; + return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); +} + static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, llvm::Value *Value, @@ -375,6 +400,21 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::C; } + +llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, + llvm::PointerType *T, QualType QT) const { + return llvm::ConstantPointerNull::get(T); +} + +llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( + CodeGen::CodeGenFunction &CGF, llvm::Value *Src, QualType SrcTy, + QualType DestTy) const { + // Since target may map different address spaces in AST to the same address + // space, an address space conversion may end up as a bitcast. + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, + CGF.ConvertType(DestTy)); +} + static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it @@ -932,6 +972,11 @@ public: // scalar registers. return occupiesMoreThan(CGT, scalars, /*total*/ 3); } + + bool isSwiftErrorInRegister() const override { + // x86-32 lowering does not support passing swifterror in a register. + return false; + } }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { @@ -1203,7 +1248,8 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(RetTy, Base, NumElts)) { // The LLVM struct type for such an aggregate should lower properly. return ABIArgInfo::getDirect(); @@ -1417,7 +1463,8 @@ bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, return true; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs) NeedsPadding = true; @@ -1435,7 +1482,8 @@ bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const { return false; if (State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall) { + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) { if (getContext().getTypeSize(Ty) > 32) return false; @@ -1468,7 +1516,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, // to other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (State.CC == llvm::CallingConv::X86_VectorCall && + if ((State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; @@ -1514,7 +1563,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || - State.CC == llvm::CallingConv::X86_VectorCall, + State.CC == llvm::CallingConv::X86_VectorCall || + State.CC == llvm::CallingConv::X86_RegCall, PaddingType); return getIndirectResult(Ty, true, State); @@ -1565,7 +1615,10 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) State.FreeRegs = FI.getRegParm(); - else + else if (State.CC == llvm::CallingConv::X86_RegCall) { + State.FreeRegs = 5; + State.FreeSSERegs = 8; + } else State.FreeRegs = DefaultNumRegisterParameters; if (!getCXXABI().classifyReturnType(FI)) { @@ -1906,12 +1959,16 @@ class X86_64ABIInfo : public SwiftABIInfo { ABIArgInfo classifyReturnType(QualType RetTy) const; - ABIArgInfo classifyArgumentType(QualType Ty, - unsigned freeIntRegs, - unsigned &neededInt, - unsigned &neededSSE, + ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, + unsigned &neededInt, unsigned &neededSSE, bool isNamedArg) const; + ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const; + + ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const; + bool IsIllegalVectorType(QualType Ty) const; /// The 0.98 ABI revision clarified a lot of ambiguities, @@ -1974,13 +2031,16 @@ public: bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } + bool isSwiftErrorInRegister() const override { + return true; + } }; /// WinX86_64ABIInfo - The Windows X86_64 ABI information. -class WinX86_64ABIInfo : public ABIInfo { +class WinX86_64ABIInfo : public SwiftABIInfo { public: WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) - : ABIInfo(CGT), + : SwiftABIInfo(CGT), IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; @@ -1999,6 +2059,16 @@ public: return isX86VectorCallAggregateSmallEnough(NumMembers); } + bool shouldPassIndirectlyForSwift(CharUnits totalSize, + ArrayRef<llvm::Type *> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + + bool isSwiftErrorInRegister() const override { + return true; + } + private: ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType) const; @@ -2315,13 +2385,13 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Current = SSE; } else if (k == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::IEEEquad) { + if (LDF == &llvm::APFloat::IEEEquad()) { Lo = SSE; Hi = SSEUp; - } else if (LDF == &llvm::APFloat::x87DoubleExtended) { + } else if (LDF == &llvm::APFloat::x87DoubleExtended()) { Lo = X87; Hi = X87Up; - } else if (LDF == &llvm::APFloat::IEEEdouble) { + } else if (LDF == &llvm::APFloat::IEEEdouble()) { Current = SSE; } else llvm_unreachable("unexpected long double representation!"); @@ -2440,11 +2510,11 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Lo = Hi = SSE; } else if (ET == getContext().LongDoubleTy) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::IEEEquad) + if (LDF == &llvm::APFloat::IEEEquad()) Current = Memory; - else if (LDF == &llvm::APFloat::x87DoubleExtended) + else if (LDF == &llvm::APFloat::x87DoubleExtended()) Current = ComplexX87; - else if (LDF == &llvm::APFloat::IEEEdouble) + else if (LDF == &llvm::APFloat::IEEEdouble()) Lo = Hi = SSE; else llvm_unreachable("unexpected long double representation!"); @@ -2466,8 +2536,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger - // than four eightbytes, ..., it has class MEMORY. - if (Size > 256) + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned @@ -2486,7 +2556,9 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // The only case a 256-bit wide vector could be used is when the array // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. - if (Size > 128 && EltSize != 256) + // + if (Size > 128 && + (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) return; for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) { @@ -2507,8 +2579,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, uint64_t Size = getContext().getTypeSize(Ty); // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger - // than four eightbytes, ..., it has class MEMORY. - if (Size > 256) + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial @@ -2561,6 +2633,10 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); bool BitField = i->isBitField(); + // Ignore padding bit-fields. + if (BitField && i->isUnnamedBitfield()) + continue; + // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than // four eightbytes, or it contains unaligned fields, it has class MEMORY. // @@ -2568,7 +2644,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // contains a single 256-bit element. Since Lo and Hi logic isn't extended // to work for sizes wider than 128, early check and fallback to memory. // - if (Size > 128 && getContext().getTypeSize(i->getType()) != 256) { + if (Size > 128 && (Size != getContext().getTypeSize(i->getType()) || + Size > getNativeVectorSizeForAVXABI(AVXLevel))) { Lo = Memory; postMerge(Size, Lo, Hi); return; @@ -2592,10 +2669,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // structure to be passed in memory even if unaligned, and // therefore they can straddle an eightbyte. if (BitField) { - // Ignore padding bit-fields. - if (i->isUnnamedBitfield()) - continue; - + assert(!i->isUnnamedBitfield()); uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx); uint64_t Size = i->getBitWidthValue(getContext()); @@ -2723,7 +2797,7 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); - assert((Size == 128 || Size == 256) && "Invalid type found!"); + assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), @@ -3247,22 +3321,94 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( return ABIArgInfo::getDirect(ResType); } +ABIArgInfo +X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE) const { + auto RT = Ty->getAs<RecordType>(); + assert(RT && "classifyRegCallStructType only valid with struct types"); + + if (RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectReturnResult(Ty); + + // Sum up bases + if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) { + if (CXXRD->isDynamicClass()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + + for (const auto &I : CXXRD->bases()) + if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } + + // Sum up members + for (const auto *FD : RT->getDecl()->fields()) { + if (FD->getType()->isRecordType() && !FD->getType()->isUnionType()) { + if (classifyRegCallStructTypeImpl(FD->getType(), NeededInt, NeededSSE) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + } else { + unsigned LocalNeededInt, LocalNeededSSE; + if (classifyArgumentType(FD->getType(), UINT_MAX, LocalNeededInt, + LocalNeededSSE, true) + .isIndirect()) { + NeededInt = NeededSSE = 0; + return getIndirectReturnResult(Ty); + } + NeededInt += LocalNeededInt; + NeededSSE += LocalNeededSSE; + } + } + + return ABIArgInfo::getDirect(); +} + +ABIArgInfo X86_64ABIInfo::classifyRegCallStructType(QualType Ty, + unsigned &NeededInt, + unsigned &NeededSSE) const { + + NeededInt = 0; + NeededSSE = 0; + + return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE); +} + void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; // Keep track of the number of assigned registers. - unsigned freeIntRegs = 6, freeSSERegs = 8; + unsigned FreeIntRegs = IsRegCall ? 11 : 6; + unsigned FreeSSERegs = IsRegCall ? 16 : 8; + unsigned NeededInt, NeededSSE; + + if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && + !FI.getReturnType()->getTypePtr()->isUnionType()) { + FI.getReturnInfo() = + classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } else if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); // If the return value is indirect, then the hidden argument is consuming one // integer register. if (FI.getReturnInfo().isIndirect()) - --freeIntRegs; + --FreeIntRegs; // The chain argument effectively gives us another free register. if (FI.isChainCall()) - ++freeIntRegs; + ++FreeIntRegs; unsigned NumRequiredArgs = FI.getNumRequiredArgs(); // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers @@ -3272,19 +3418,21 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { it != ie; ++it, ++ArgNo) { bool IsNamedArg = ArgNo < NumRequiredArgs; - unsigned neededInt, neededSSE; - it->info = classifyArgumentType(it->type, freeIntRegs, neededInt, - neededSSE, IsNamedArg); + if (IsRegCall && it->type->isStructureOrClassType()) + it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE); + else + it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, + NeededSSE, IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the // stack. If registers have already been assigned for some // eightbytes of such an argument, the assignments get reverted. - if (freeIntRegs >= neededInt && freeSSERegs >= neededSSE) { - freeIntRegs -= neededInt; - freeSSERegs -= neededSSE; + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; } else { - it->info = getIndirectResult(it->type, freeIntRegs); + it->info = getIndirectResult(it->type, FreeIntRegs); } } } @@ -3426,15 +3574,17 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr; // Copy the first element. - llvm::Value *V = - CGF.Builder.CreateDefaultAlignedLoad( - CGF.Builder.CreateBitCast(RegLoAddr, PTyLo)); + // FIXME: Our choice of alignment here and below is probably pessimistic. + llvm::Value *V = CGF.Builder.CreateAlignedLoad( + TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo))); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); // Copy the second element. - V = CGF.Builder.CreateDefaultAlignedLoad( - CGF.Builder.CreateBitCast(RegHiAddr, PTyHi)); + V = CGF.Builder.CreateAlignedLoad( + TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi))); CharUnits Offset = CharUnits::fromQuantity( getDataLayout().getStructLayout(ST)->getElementOffset(1)); CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset)); @@ -3597,7 +3747,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, // passes them indirectly through memory. if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::x87DoubleExtended) + if (LDF == &llvm::APFloat::x87DoubleExtended()) return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } @@ -3607,21 +3757,44 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; + bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; + + unsigned FreeSSERegs = 0; + if (IsVectorCall) { + // We can use up to 4 SSE return registers with vectorcall. + FreeSSERegs = 4; + } else if (IsRegCall) { + // RegCall gives us 16 SSE registers. + FreeSSERegs = 16; + } - // We can use up to 4 SSE return registers with vectorcall. - unsigned FreeSSERegs = IsVectorCall ? 4 : 0; if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); - // We can use up to 6 SSE register parameters with vectorcall. - FreeSSERegs = IsVectorCall ? 6 : 0; + if (IsVectorCall) { + // We can use up to 6 SSE register parameters with vectorcall. + FreeSSERegs = 6; + } else if (IsRegCall) { + FreeSSERegs = 16; + } + for (auto &I : FI.arguments()) I.info = classify(I.type, FreeSSERegs, false); } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + + bool IsIndirect = false; + + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + if (isAggregateTypeForABI(Ty) || Ty->isMemberPointerType()) { + uint64_t Width = getContext().getTypeSize(Ty); + IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width); + } + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, CGF.getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(8), /*allowHigherAlign*/ false); @@ -3859,6 +4032,7 @@ private: static const unsigned GPRBits = 64; ABIKind Kind; bool HasQPX; + bool IsSoftFloatABI; // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and // will be passed in a QPX register. @@ -3889,8 +4063,10 @@ private: } public: - PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX) - : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {} + PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX), + IsSoftFloatABI(SoftFloatABI) {} bool isPromotableTypeForABI(QualType Ty) const; CharUnits getParamTypeAlignment(QualType Ty) const; @@ -3938,8 +4114,10 @@ class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo { public: PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, - PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX) - : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {} + PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX, + bool SoftFloatABI) + : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX, + SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -4157,8 +4335,11 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || - BT->getKind() == BuiltinType::LongDouble) + BT->getKind() == BuiltinType::LongDouble) { + if (IsSoftFloatABI) + return false; return true; + } } if (const VectorType *VT = Ty->getAs<VectorType>()) { if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty)) @@ -4373,14 +4554,17 @@ PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, // 32-63: fp0-31, the 8-byte floating-point registers AssignToArrayRange(Builder, Address, Eight8, 32, 63); - // 64-76 are various 4-byte special-purpose registers: + // 64-67 are various 8-byte special-purpose registers: // 64: mq // 65: lr // 66: ctr // 67: ap + AssignToArrayRange(Builder, Address, Eight8, 64, 67); + + // 68-76 are various 4-byte special-purpose registers: // 68-75 cr0-7 // 76: xer - AssignToArrayRange(Builder, Address, Four8, 64, 76); + AssignToArrayRange(Builder, Address, Four8, 68, 76); // 77-108: v0-31, the 16-byte vector registers AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); @@ -4390,7 +4574,10 @@ PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, // 111: spe_acc // 112: spefscr // 113: sfp - AssignToArrayRange(Builder, Address, Four8, 109, 113); + // 114: tfhar + // 115: tfiar + // 116: texasr + AssignToArrayRange(Builder, Address, Eight8, 109, 116); return false; } @@ -4467,6 +4654,9 @@ private: bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } + bool isSwiftErrorInRegister() const override { + return true; + } }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4551,6 +4741,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { // Aggregates <= 16 bytes are passed directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 128) { + // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(Ty, getContext(), getVMContext()); + } unsigned Alignment = getContext().getTypeAlign(Ty); Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes @@ -4596,6 +4791,11 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const { // Aggregates <= 16 bytes are returned directly in registers or on the stack. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 128) { + // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(RetTy, getContext(), getVMContext()); + } unsigned Alignment = getContext().getTypeAlign(RetTy); Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes @@ -5010,6 +5210,9 @@ private: bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } + bool isSwiftErrorInRegister() const override { + return true; + } }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5286,6 +5489,12 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, /*Realign=*/TyAlign > ABIAlign); } + // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(Ty, getContext(), getVMContext()); + } + // Otherwise, pass by coercing to a structure of the appropriate size. llvm::Type* ElemTy; unsigned SizeRegs; @@ -5467,6 +5676,11 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 32) { + // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of + // same size and alignment. + if (getTarget().isRenderScriptTarget()) { + return coerceToIntArray(RetTy, getContext(), getVMContext()); + } if (getDataLayout().isBigEndian()) // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); @@ -5767,6 +5981,9 @@ public: bool asReturnValue) const override { return occupiesMoreThan(CGT, scalars, /*total*/ 4); } + bool isSwiftErrorInRegister() const override { + return true; + } }; class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { @@ -6825,45 +7042,138 @@ public: namespace { +class AMDGPUABIInfo final : public DefaultABIInfo { +public: + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyArgumentType(QualType Ty) const; + + void computeInfo(CGFunctionInfo &FI) const override; +}; + +void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + unsigned CC = FI.getCallingConvention(); + for (auto &Arg : FI.arguments()) + if (CC == llvm::CallingConv::AMDGPU_KERNEL) + Arg.info = classifyArgumentType(Arg.type); + else + Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type); +} + +/// \brief Classify argument of given type \p Ty. +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { + llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)); + if (!StrTy) { + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Coerce single element structs to its element. + if (StrTy->getNumElements() == 1) { + return ABIArgInfo::getDirect(); + } + + // If we set CanBeFlattened to true, CodeGen will expand the struct to its + // individual elements, which confuses the Clover OpenCL backend; therefore we + // have to set it to false here. Other args of getDirect() are just defaults. + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); +} + class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; -}; + llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, + llvm::PointerType *T, QualType QT) const override; +}; } +static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM); + void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { + const Decl *D, + llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const { const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; - if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { - llvm::Function *F = cast<llvm::Function>(GV); - uint32_t NumVGPR = Attr->getNumVGPR(); - if (NumVGPR != 0) - F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR)); + llvm::Function *F = cast<llvm::Function>(GV); + + if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) { + unsigned Min = Attr->getMin(); + unsigned Max = Attr->getMax(); + + if (Min != 0) { + assert(Min <= Max && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); } - if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { - llvm::Function *F = cast<llvm::Function>(GV); + if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { + unsigned Min = Attr->getMin(); + unsigned Max = Attr->getMax(); + + if (Min != 0) { + assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min); + if (Max != 0) + AttrVal = AttrVal + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-waves-per-eu", AttrVal); + } else + assert(Max == 0 && "Max must be zero"); + } + + if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) { unsigned NumSGPR = Attr->getNumSGPR(); + if (NumSGPR != 0) - F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR)); + F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); } -} + if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) { + uint32_t NumVGPR = Attr->getNumVGPR(); + + if (NumVGPR != 0) + F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); + } + + appendOpenCLVersionMD(M); +} unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::AMDGPU_KERNEL; } +// Currently LLVM assumes null pointers always have value 0, +// which results in incorrectly transformed IR. Therefore, instead of +// emitting null pointers in private and local address spaces, a null +// pointer in generic address space is emitted which is casted to a +// pointer in local or private address space. +llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( + const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT, + QualType QT) const { + if (CGM.getContext().getTargetNullPointerValue(QT) == 0) + return llvm::ConstantPointerNull::get(PT); + + auto &Ctx = CGM.getContext(); + auto NPT = llvm::PointerType::get(PT->getElementType(), + Ctx.getTargetAddressSpace(LangAS::opencl_generic)); + return llvm::ConstantExpr::getAddrSpaceCast( + llvm::ConstantPointerNull::get(NPT), PT); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. @@ -7303,7 +7613,7 @@ class FieldEncoding { std::string Enc; public: FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {} - StringRef str() {return Enc.c_str();} + StringRef str() { return Enc; } bool operator<(const FieldEncoding &rhs) const { if (HasName != rhs.HasName) return HasName; return Enc < rhs.Enc; @@ -7469,7 +7779,7 @@ StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { E.State = IncompleteUsed; ++IncompleteUsedCount; } - return E.Str.c_str(); + return E.Str; } /// The XCore ABI includes a type information section that communicates symbol @@ -7525,11 +7835,20 @@ void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the // opencl.spir.version named metadata. llvm::Metadata *SPIRVerElts[] = { - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 2)), - llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 0))}; + llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)), + llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( + Int32Ty, (CGM.getLangOpts().OpenCLVersion / 100 > 1) ? 0 : 2))}; llvm::NamedMDNode *SPIRVerMD = M.getOrInsertNamedMetadata("opencl.spir.version"); SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); + appendOpenCLVersionMD(CGM); +} + +static void appendOpenCLVersionMD(CodeGen::CodeGenModule &CGM) { + llvm::LLVMContext &Ctx = CGM.getModule().getContext(); + llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); + llvm::Module &M = CGM.getModule(); // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the // opencl.ocl.version named metadata node. llvm::Metadata *OCLVerElts[] = { @@ -7882,10 +8201,6 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D, // Driver code //===----------------------------------------------------------------------===// -const llvm::Triple &CodeGenModule::getTriple() const { - return getTarget().getTriple(); -} - bool CodeGenModule::supportsCOMDAT() const { return getTriple().supportsCOMDAT(); } @@ -7964,8 +8279,10 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (getTarget().getABI() == "elfv2") Kind = PPC64_SVR4_ABIInfo::ELFv2; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } else return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); case llvm::Triple::ppc64le: { @@ -7974,8 +8291,10 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx") Kind = PPC64_SVR4_ABIInfo::ELFv1; bool HasQPX = getTarget().getABI() == "elfv1-qpx"; + bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; - return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX)); + return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, + IsSoftFloat)); } case llvm::Triple::nvptx: @@ -7991,6 +8310,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { } case llvm::Triple::tce: + case llvm::Triple::tcele: return SetCGInfo(new TCETargetCodeGenInfo(Types)); case llvm::Triple::x86: { |